f65ed39c371d

First passes at MRNA and SPLC
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Mon, 05 Nov 2018 19:49:03 -0500
parents e279056b1a5b
children fc16ddecf261
branches/tags (none)
files rosalind.asd src/problems/mrna.lisp src/problems/splc.lisp src/utils.lisp

Changes

--- a/rosalind.asd	Sat Nov 03 18:20:48 2018 -0400
+++ b/rosalind.asd	Mon Nov 05 19:49:03 2018 -0500
@@ -41,5 +41,7 @@
                                            (:file "fibd")
                                            (:file "cons")
                                            (:file "grph")
-                                           (:file "prtm")))))))
+                                           (:file "prtm")
+                                           (:file "mrna")
+                                           (:file "splc")))))))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/mrna.lisp	Mon Nov 05 19:49:03 2018 -0500
@@ -0,0 +1,60 @@
+(in-package :rosalind)
+
+;; We're using a real programming language, so we have actual numbers and don't
+;; need to bother with modular arithmetic for the tiny inputs they're giving us
+;; as data sets:
+;;
+;; (solve mrna) =>
+;; 18223466647209680564519396994425171292117842339309783544950259938739364856964963
+;; 66953808268144949737039356476412047824299213884930571696940173622342387041181012
+;; 59342299576631057415056504154013752799852275638571559263796551872383306751629837
+;; 52919433077998281001987083484254030689771952436244061958890571552274110433782197
+;; 92667561733403741083516672972846644834108916554466004897769300303373575679132517
+;; 064863091594635637947593916416
+;;
+;; But let's humor them and do it anyway, for fun.  I added it into my utils
+;; library.
+
+(defun acid-to-codons (acid)
+  "Return a list of the codons that could have encoded this amino acid.
+
+  Neither the list nor the strings in it will be fresh.
+  Use `(mapcar #'copy-seq (acid-to-codon …))` if you need fresh copies.
+
+  "
+  (ecase acid
+    (#\I   '("AUA" "AUC" "AUU"))
+    (#\R   '("AGG" "CGG" "AGA" "CGA" "CGC" "CGU"))
+    (#\D   '("GAC" "GAU"))
+    (#\V   '("GUG" "GUA" "GUC" "GUU"))
+    (#\P   '("CCG" "CCA" "CCC" "CCU"))
+    (#\T   '("ACG" "ACA" "ACC" "ACU"))
+    (#\S   '("AGC" "AGU" "UCG" "UCA" "UCC" "UCU"))
+    (#\F   '("UUC" "UUU"))
+    (#\Y   '("UAC" "UAU"))
+    (#\L   '("CUG" "UUG" "CUA" "UUA" "CUC" "CUU"))
+    (#\Q   '("CAG" "CAA"))
+    (#\H   '("CAC " "CAU"))
+    (#\C   '("UGC" "UGU"))
+    (#\K   '("AAG" "AAA"))
+    (#\N   '("AAC" "AAU"))
+    (#\M   '("AUG"))
+    (#\W   '("UGG"))
+    (#\G   '("GGG" "GGA" "GGC" "GGU"))
+    (#\E   '("GAG" "GAA"))
+    (#\A   '("GCG" "GCA" "GCC" "GCU"))
+    ((nil) '("UGA" "UAG" "UAA"))))
+
+(defun acid-codon-count (acid)
+  (length (acid-to-codons acid)))
+
+
+(define-problem mrna (data string)
+    "MA"
+    "12"
+  (product (delete #\newline data)
+           :modulo 1000000
+           :key #'acid-codon-count
+           :initial-value (acid-codon-count nil)))
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/splc.lisp	Mon Nov 05 19:49:03 2018 -0500
@@ -0,0 +1,53 @@
+(in-package :rosalind)
+
+(defparameter *input-splc* ">Rosalind_10
+ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
+>Rosalind_12
+ATCGGTCGAA
+>Rosalind_15
+ATCGGTCGAGCGTGT")
+
+(defparameter *output-splc* "MVYIADKQHVASREAYGHMFKVCA")
+
+
+(defun prefixp (prefix vector &key (start 0) (test #'eql))
+  (and (<= (length prefix)
+           (+ start (length vector)))
+       (iterate
+         (for x :in-vector prefix)
+         (for y :in-vector vector :from start)
+         (always (funcall test x y)))))
+
+(defun string=* (string1 string2 &key (start1 0) end1 (start2 0) end2)
+  (let ((l1 (length string1))
+        (l2 (length string2)))
+    (string= string1 string2
+             :start1 (min (max 0 start1) l1)
+             :start2 (min (max 0 start2) l2)
+             :end1 (min (max 0 end1) l1)
+             :end2 (min (max 0 end2) l2))))
+
+(defun intron-matches-p (intron dna &key (start 0))
+  (prefixp intron dna :start start))
+
+(defun find-intron (introns dna &key (start 0))
+  (find-if (rcurry #'intron-matches-p dna :start start) introns))
+
+(defun remove-introns (dna introns)
+  (iterate
+    (for base :in-vector dna :with-index i)
+    (if-let ((intron (find-intron introns dna :start i)))
+      (incf i (1- (length intron)))
+      (collect base :result-type string))))
+
+
+
+(define-problem splc (data stream)
+    *input-splc*
+    *output-splc*
+  (destructuring-bind (dna . introns)
+      (mapcar #'cdr (read-fasta-into-alist data))
+    (-<> dna
+      (remove-introns <> introns)
+      transcribe
+      translate)))
--- a/src/utils.lisp	Sat Nov 03 18:20:48 2018 -0400
+++ b/src/utils.lisp	Mon Nov 05 19:49:03 2018 -0500
@@ -219,6 +219,11 @@
   (iterate (for (label data) :in-fasta source)
            (collect-hash (label data) :test #'equal)))
 
+(defun read-fasta-into-alist (source)
+  "Return everything in the FASTA `source` as an alist of labels to data."
+  (iterate (for (label data) :in-fasta source)
+           (collect (cons label data))))
+
 
 ;;;; Testing ------------------------------------------------------------------
 (defmacro define-test (problem input output &optional (test 'string=))