e0c558fa549b
KMER, SSEQ
author | Steve Losh <steve@stevelosh.com> |
---|---|
date | Fri, 22 Feb 2019 19:00:15 -0500 |
parents | 4962c672f610 |
children | 7052ec0c6e1d |
branches/tags | (none) |
files | src/problems/kmer.lisp src/problems/sseq.lisp |
Changes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/kmer.lisp Fri Feb 22 19:00:15 2019 -0500 @@ -0,0 +1,47 @@ +(in-package :rosalind) + +(defparameter *input-kmer* ">Rosalind_6431 +CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG +CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT +TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA +AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG +GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA +CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA +CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG") + +(defparameter *output-kmer* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1") + + +(defun mapc-kmers (function n &key (copy t)) + (let ((kmer (make-array n :element-type 'character))) + (recursively ((i 0)) + (if (= i n) + (funcall function (if copy + (copy-seq kmer) + kmer)) + (flet ((branch (base) + (setf (aref kmer i) base) + (recur (1+ i)))) + (map nil #'branch "ACGT")))))) + +(defun map-kmers (function n) + (gathering (mapc-kmers (compose #'gather function) n))) + +(defun kmers (n) + (map-kmers #'identity n)) + + +(define-problem kmer (data stream) + *input-kmer* + *output-kmer* + (iterate + (with n = 4) + (with seq = (nth-value 1 (read-fasta data))) + (with counts = (make-hash-table :test #'equal)) + (for i :from 0 :to (- (length seq) n)) + (for kmer = (subseq seq i (+ i n))) + (incf (gethash kmer counts 0)) + (finally + (return + (format nil "~{~D~^ ~}" + (map-kmers (lambda (kmer) (gethash kmer counts 0)) n))))))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/sseq.lisp Fri Feb 22 19:00:15 2019 -0500 @@ -0,0 +1,36 @@ +(in-package :rosalind) + +(defparameter *input-sseq* ">Rosalind_14 +ACGTACGTGACG +>Rosalind_18 +GTA +") + +(defparameter *output-sseq* "3 4 5") + +;; todo: make this more efficient for lists +(defun subsequence-positions (needle haystack &key + (test #'eql) + (start-needle 0) + (end-needle (length needle)) + (start-haystack 0) + (end-haystack (length haystack))) + (iterate + (with ni = start-needle) + (with n = (elt needle ni)) + (for h :in-vector haystack :with-index hi :from start-haystack :below end-haystack) + (when (funcall test n h) + (collect hi :into result) + (incf ni) + (if (= ni end-needle) + (return result) + (setf n (elt needle ni)))))) + +(define-problem sseq (data stream) + *input-sseq* + *output-sseq* + (let* ((haystack (nth-value 1 (read-fasta data))) + (needle (nth-value 1 (read-fasta data)))) + (-<> (subsequence-positions needle haystack :test #'char=) + (mapcar #'1+ <>) + (format nil "~{~D~^ ~}" <>))))