# HG changeset patch # User Steve Losh # Date 1541465343 18000 # Node ID f65ed39c371d9381ccbac9f5e49bd783861e6e34 # Parent e279056b1a5b2935f6e16694234803e68b0fa5f1 First passes at MRNA and SPLC diff -r e279056b1a5b -r f65ed39c371d rosalind.asd --- a/rosalind.asd Sat Nov 03 18:20:48 2018 -0400 +++ b/rosalind.asd Mon Nov 05 19:49:03 2018 -0500 @@ -41,5 +41,7 @@ (:file "fibd") (:file "cons") (:file "grph") - (:file "prtm"))))))) + (:file "prtm") + (:file "mrna") + (:file "splc"))))))) diff -r e279056b1a5b -r f65ed39c371d src/problems/mrna.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/mrna.lisp Mon Nov 05 19:49:03 2018 -0500 @@ -0,0 +1,60 @@ +(in-package :rosalind) + +;; We're using a real programming language, so we have actual numbers and don't +;; need to bother with modular arithmetic for the tiny inputs they're giving us +;; as data sets: +;; +;; (solve mrna) => +;; 18223466647209680564519396994425171292117842339309783544950259938739364856964963 +;; 66953808268144949737039356476412047824299213884930571696940173622342387041181012 +;; 59342299576631057415056504154013752799852275638571559263796551872383306751629837 +;; 52919433077998281001987083484254030689771952436244061958890571552274110433782197 +;; 92667561733403741083516672972846644834108916554466004897769300303373575679132517 +;; 064863091594635637947593916416 +;; +;; But let's humor them and do it anyway, for fun. I added it into my utils +;; library. + +(defun acid-to-codons (acid) + "Return a list of the codons that could have encoded this amino acid. + + Neither the list nor the strings in it will be fresh. + Use `(mapcar #'copy-seq (acid-to-codon …))` if you need fresh copies. + + " + (ecase acid + (#\I '("AUA" "AUC" "AUU")) + (#\R '("AGG" "CGG" "AGA" "CGA" "CGC" "CGU")) + (#\D '("GAC" "GAU")) + (#\V '("GUG" "GUA" "GUC" "GUU")) + (#\P '("CCG" "CCA" "CCC" "CCU")) + (#\T '("ACG" "ACA" "ACC" "ACU")) + (#\S '("AGC" "AGU" "UCG" "UCA" "UCC" "UCU")) + (#\F '("UUC" "UUU")) + (#\Y '("UAC" "UAU")) + (#\L '("CUG" "UUG" "CUA" "UUA" "CUC" "CUU")) + (#\Q '("CAG" "CAA")) + (#\H '("CAC " "CAU")) + (#\C '("UGC" "UGU")) + (#\K '("AAG" "AAA")) + (#\N '("AAC" "AAU")) + (#\M '("AUG")) + (#\W '("UGG")) + (#\G '("GGG" "GGA" "GGC" "GGU")) + (#\E '("GAG" "GAA")) + (#\A '("GCG" "GCA" "GCC" "GCU")) + ((nil) '("UGA" "UAG" "UAA")))) + +(defun acid-codon-count (acid) + (length (acid-to-codons acid))) + + +(define-problem mrna (data string) + "MA" + "12" + (product (delete #\newline data) + :modulo 1000000 + :key #'acid-codon-count + :initial-value (acid-codon-count nil))) + + diff -r e279056b1a5b -r f65ed39c371d src/problems/splc.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/splc.lisp Mon Nov 05 19:49:03 2018 -0500 @@ -0,0 +1,53 @@ +(in-package :rosalind) + +(defparameter *input-splc* ">Rosalind_10 +ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG +>Rosalind_12 +ATCGGTCGAA +>Rosalind_15 +ATCGGTCGAGCGTGT") + +(defparameter *output-splc* "MVYIADKQHVASREAYGHMFKVCA") + + +(defun prefixp (prefix vector &key (start 0) (test #'eql)) + (and (<= (length prefix) + (+ start (length vector))) + (iterate + (for x :in-vector prefix) + (for y :in-vector vector :from start) + (always (funcall test x y))))) + +(defun string=* (string1 string2 &key (start1 0) end1 (start2 0) end2) + (let ((l1 (length string1)) + (l2 (length string2))) + (string= string1 string2 + :start1 (min (max 0 start1) l1) + :start2 (min (max 0 start2) l2) + :end1 (min (max 0 end1) l1) + :end2 (min (max 0 end2) l2)))) + +(defun intron-matches-p (intron dna &key (start 0)) + (prefixp intron dna :start start)) + +(defun find-intron (introns dna &key (start 0)) + (find-if (rcurry #'intron-matches-p dna :start start) introns)) + +(defun remove-introns (dna introns) + (iterate + (for base :in-vector dna :with-index i) + (if-let ((intron (find-intron introns dna :start i))) + (incf i (1- (length intron))) + (collect base :result-type string)))) + + + +(define-problem splc (data stream) + *input-splc* + *output-splc* + (destructuring-bind (dna . introns) + (mapcar #'cdr (read-fasta-into-alist data)) + (-<> dna + (remove-introns <> introns) + transcribe + translate))) diff -r e279056b1a5b -r f65ed39c371d src/utils.lisp --- a/src/utils.lisp Sat Nov 03 18:20:48 2018 -0400 +++ b/src/utils.lisp Mon Nov 05 19:49:03 2018 -0500 @@ -219,6 +219,11 @@ (iterate (for (label data) :in-fasta source) (collect-hash (label data) :test #'equal))) +(defun read-fasta-into-alist (source) + "Return everything in the FASTA `source` as an alist of labels to data." + (iterate (for (label data) :in-fasta source) + (collect (cons label data)))) + ;;;; Testing ------------------------------------------------------------------ (defmacro define-test (problem input output &optional (test 'string=))