First passes at MRNA and SPLC
author |
Steve Losh <steve@stevelosh.com> |
date |
Mon, 05 Nov 2018 19:49:03 -0500 (2018-11-06) |
parents |
e279056b1a5b
|
children |
fc16ddecf261
|
branches/tags |
(none) |
files |
rosalind.asd src/problems/mrna.lisp src/problems/splc.lisp src/utils.lisp |
Changes
--- a/rosalind.asd Sat Nov 03 18:20:48 2018 -0400
+++ b/rosalind.asd Mon Nov 05 19:49:03 2018 -0500
@@ -41,5 +41,7 @@
(:file "fibd")
(:file "cons")
(:file "grph")
- (:file "prtm")))))))
+ (:file "prtm")
+ (:file "mrna")
+ (:file "splc")))))))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/mrna.lisp Mon Nov 05 19:49:03 2018 -0500
@@ -0,0 +1,60 @@
+(in-package :rosalind)
+
+;; We're using a real programming language, so we have actual numbers and don't
+;; need to bother with modular arithmetic for the tiny inputs they're giving us
+;; as data sets:
+;;
+;; (solve mrna) =>
+;; 18223466647209680564519396994425171292117842339309783544950259938739364856964963
+;; 66953808268144949737039356476412047824299213884930571696940173622342387041181012
+;; 59342299576631057415056504154013752799852275638571559263796551872383306751629837
+;; 52919433077998281001987083484254030689771952436244061958890571552274110433782197
+;; 92667561733403741083516672972846644834108916554466004897769300303373575679132517
+;; 064863091594635637947593916416
+;;
+;; But let's humor them and do it anyway, for fun. I added it into my utils
+;; library.
+
+(defun acid-to-codons (acid)
+ "Return a list of the codons that could have encoded this amino acid.
+
+ Neither the list nor the strings in it will be fresh.
+ Use `(mapcar #'copy-seq (acid-to-codon …))` if you need fresh copies.
+
+ "
+ (ecase acid
+ (#\I '("AUA" "AUC" "AUU"))
+ (#\R '("AGG" "CGG" "AGA" "CGA" "CGC" "CGU"))
+ (#\D '("GAC" "GAU"))
+ (#\V '("GUG" "GUA" "GUC" "GUU"))
+ (#\P '("CCG" "CCA" "CCC" "CCU"))
+ (#\T '("ACG" "ACA" "ACC" "ACU"))
+ (#\S '("AGC" "AGU" "UCG" "UCA" "UCC" "UCU"))
+ (#\F '("UUC" "UUU"))
+ (#\Y '("UAC" "UAU"))
+ (#\L '("CUG" "UUG" "CUA" "UUA" "CUC" "CUU"))
+ (#\Q '("CAG" "CAA"))
+ (#\H '("CAC " "CAU"))
+ (#\C '("UGC" "UGU"))
+ (#\K '("AAG" "AAA"))
+ (#\N '("AAC" "AAU"))
+ (#\M '("AUG"))
+ (#\W '("UGG"))
+ (#\G '("GGG" "GGA" "GGC" "GGU"))
+ (#\E '("GAG" "GAA"))
+ (#\A '("GCG" "GCA" "GCC" "GCU"))
+ ((nil) '("UGA" "UAG" "UAA"))))
+
+(defun acid-codon-count (acid)
+ (length (acid-to-codons acid)))
+
+
+(define-problem mrna (data string)
+ "MA"
+ "12"
+ (product (delete #\newline data)
+ :modulo 1000000
+ :key #'acid-codon-count
+ :initial-value (acid-codon-count nil)))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/splc.lisp Mon Nov 05 19:49:03 2018 -0500
@@ -0,0 +1,53 @@
+(in-package :rosalind)
+
+(defparameter *input-splc* ">Rosalind_10
+ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
+>Rosalind_12
+ATCGGTCGAA
+>Rosalind_15
+ATCGGTCGAGCGTGT")
+
+(defparameter *output-splc* "MVYIADKQHVASREAYGHMFKVCA")
+
+
+(defun prefixp (prefix vector &key (start 0) (test #'eql))
+ (and (<= (length prefix)
+ (+ start (length vector)))
+ (iterate
+ (for x :in-vector prefix)
+ (for y :in-vector vector :from start)
+ (always (funcall test x y)))))
+
+(defun string=* (string1 string2 &key (start1 0) end1 (start2 0) end2)
+ (let ((l1 (length string1))
+ (l2 (length string2)))
+ (string= string1 string2
+ :start1 (min (max 0 start1) l1)
+ :start2 (min (max 0 start2) l2)
+ :end1 (min (max 0 end1) l1)
+ :end2 (min (max 0 end2) l2))))
+
+(defun intron-matches-p (intron dna &key (start 0))
+ (prefixp intron dna :start start))
+
+(defun find-intron (introns dna &key (start 0))
+ (find-if (rcurry #'intron-matches-p dna :start start) introns))
+
+(defun remove-introns (dna introns)
+ (iterate
+ (for base :in-vector dna :with-index i)
+ (if-let ((intron (find-intron introns dna :start i)))
+ (incf i (1- (length intron)))
+ (collect base :result-type string))))
+
+
+
+(define-problem splc (data stream)
+ *input-splc*
+ *output-splc*
+ (destructuring-bind (dna . introns)
+ (mapcar #'cdr (read-fasta-into-alist data))
+ (-<> dna
+ (remove-introns <> introns)
+ transcribe
+ translate)))
--- a/src/utils.lisp Sat Nov 03 18:20:48 2018 -0400
+++ b/src/utils.lisp Mon Nov 05 19:49:03 2018 -0500
@@ -219,6 +219,11 @@
(iterate (for (label data) :in-fasta source)
(collect-hash (label data) :test #'equal)))
+(defun read-fasta-into-alist (source)
+ "Return everything in the FASTA `source` as an alist of labels to data."
+ (iterate (for (label data) :in-fasta source)
+ (collect (cons label data))))
+
;;;; Testing ------------------------------------------------------------------
(defmacro define-test (problem input output &optional (test 'string=))