e0c558fa549b

KMER, SSEQ
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Fri, 22 Feb 2019 19:00:15 -0500 (2019-02-23)
parents 4962c672f610
children 7052ec0c6e1d
branches/tags (none)
files src/problems/kmer.lisp src/problems/sseq.lisp

Changes

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/kmer.lisp	Fri Feb 22 19:00:15 2019 -0500
@@ -0,0 +1,47 @@
+(in-package :rosalind)
+
+(defparameter *input-kmer* ">Rosalind_6431
+CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG
+CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT
+TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA
+AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG
+GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA
+CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA
+CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG")
+
+(defparameter *output-kmer* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1")
+
+
+(defun mapc-kmers (function n &key (copy t))
+  (let ((kmer (make-array n :element-type 'character)))
+    (recursively ((i 0))
+      (if (= i n)
+        (funcall function (if copy
+                            (copy-seq kmer)
+                            kmer))
+        (flet ((branch (base)
+                 (setf (aref kmer i) base)
+                 (recur (1+ i))))
+          (map nil #'branch "ACGT"))))))
+
+(defun map-kmers (function n)
+  (gathering (mapc-kmers (compose #'gather function) n)))
+
+(defun kmers (n)
+  (map-kmers #'identity n))
+
+
+(define-problem kmer (data stream)
+    *input-kmer*
+    *output-kmer*
+  (iterate
+    (with n = 4)
+    (with seq = (nth-value 1 (read-fasta data)))
+    (with counts = (make-hash-table :test #'equal))
+    (for i :from 0 :to (- (length seq) n))
+    (for kmer = (subseq seq i (+ i n)))
+    (incf (gethash kmer counts 0))
+    (finally
+      (return
+        (format nil "~{~D~^ ~}"
+                (map-kmers (lambda (kmer) (gethash kmer counts 0)) n))))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/sseq.lisp	Fri Feb 22 19:00:15 2019 -0500
@@ -0,0 +1,36 @@
+(in-package :rosalind)
+
+(defparameter *input-sseq* ">Rosalind_14
+ACGTACGTGACG
+>Rosalind_18
+GTA
+")
+
+(defparameter *output-sseq* "3 4 5")
+
+;; todo: make this more efficient for lists
+(defun subsequence-positions (needle haystack &key
+                              (test #'eql)
+                              (start-needle 0)
+                              (end-needle (length needle))
+                              (start-haystack 0)
+                              (end-haystack (length haystack)))
+  (iterate
+    (with ni = start-needle)
+    (with n = (elt needle ni))
+    (for h :in-vector haystack :with-index hi :from start-haystack :below end-haystack)
+    (when (funcall test n h)
+      (collect hi :into result)
+      (incf ni)
+      (if (= ni end-needle)
+        (return result)
+        (setf n (elt needle ni))))))
+
+(define-problem sseq (data stream)
+    *input-sseq*
+    *output-sseq*
+  (let* ((haystack (nth-value 1 (read-fasta data)))
+         (needle (nth-value 1 (read-fasta data))))
+    (-<> (subsequence-positions needle haystack :test #'char=)
+      (mapcar #'1+ <>)
+      (format nil "~{~D~^ ~}" <>))))