# HG changeset patch # User Steve Losh # Date 1541729867 18000 # Node ID b3598dc7ef5c2a29732f4e2ddf66b665c3ce2737 # Parent b5923704ce428db4cd72dd106fef06a0223f1cc5 ORF diff -r b5923704ce42 -r b3598dc7ef5c rosalind.asd --- a/rosalind.asd Thu Nov 08 20:36:22 2018 -0500 +++ b/rosalind.asd Thu Nov 08 21:17:47 2018 -0500 @@ -42,7 +42,9 @@ (:file "iprb") (:file "lcsm") (:file "lia") + (:file "mprt") (:file "mrna") + (:file "orf") (:file "perm") (:file "prot") (:file "prtm") diff -r b5923704ce42 -r b3598dc7ef5c src/problems/mprt.lisp --- a/src/problems/mprt.lisp Thu Nov 08 20:36:22 2018 -0500 +++ b/src/problems/mprt.lisp Thu Nov 08 21:17:47 2018 -0500 @@ -1,5 +1,10 @@ (in-package :rosalind) +;; This was pretty simple, except for discovering that cl-ppcre's all-matches +;; function skips overlapping matches. Otherwise we just convert the motif to +;; a regex and handle grabbing the data from Uniprot (which is straightforward +;; but can be slow). + (defparameter *input-mprt* "A2Z669 B5ZC00 diff -r b5923704ce42 -r b3598dc7ef5c src/problems/orf.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/orf.lisp Thu Nov 08 21:17:47 2018 -0500 @@ -0,0 +1,35 @@ +(in-package :rosalind) + +(defparameter *input-orf* + ">Rosalind_99 +AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG") + +(defparameter *output-orf* + "M +MTPRLGLESLLE +MGMTPRLGLESLLE +MLLGSFRLIPKETLIQVAGSSPCNLS") + + +(defun translate-all (rna) + "Return all possible proteins that can be translated from `rna`." + (iterate + (for start :first 0 :then (1+ protein-start)) + (for (values protein protein-start) = (translate rna :start start)) + (while protein) + (collect protein))) + +(define-problem orf (data stream) + *input-orf* + *output-orf* + (let* ((dna (cdr (first (read-fasta-into-alist data)))) + (rna1 (transcribe dna)) + (rna2 (transcribe (reverse-complement dna)))) + (with-output-to-string (s) + (-<> (append (translate-all rna1) + (translate-all rna2)) + (remove-duplicates <> :test #'string=) + (sort <> #'< :key #'length) + (format s "~{~A~^~%~}" <>))))) + + diff -r b5923704ce42 -r b3598dc7ef5c src/problems/prot.lisp --- a/src/problems/prot.lisp Thu Nov 08 20:36:22 2018 -0500 +++ b/src/problems/prot.lisp Thu Nov 08 21:17:47 2018 -0500 @@ -58,11 +58,29 @@ ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G))) (defun translate (rna &key (start 0)) - "Translate a string of RNA bases into a protein string of amino acids." - (iterate (for i :from (search "AUG" rna :start2 start) :by 3) - (for protein = (codon-to-protein rna i)) - (while protein) - (collect protein :result-type 'string))) + "Translate a string of RNA bases into a protein string of amino acids. + + `rna` will be searched (beginning at `start`) for a start codon and + translation will proceed from there. If no start codon occurs after `start` + then `nil` will be returned. + + Once a start codon has been found, translation proceeds to the next stop + codon. If no stop codon is present, `nil` will be returned. + + Otherwise two values are returned: the protein string and the index into `rna` + where it started. + + " + (when-let ((start (search "AUG" rna :start2 start))) + (values + (iterate (with limit = (- (length rna) 3)) + (for i :from start :by 3) + (when (> i limit) + (return-from translate (values nil nil))) + (for protein = (codon-to-protein rna i)) + (while protein) + (collect protein :result-type 'string)) + start))) (define-problem prot (data string) diff -r b5923704ce42 -r b3598dc7ef5c src/problems/revc.lisp --- a/src/problems/revc.lisp Thu Nov 08 20:36:22 2018 -0500 +++ b/src/problems/revc.lisp Thu Nov 08 21:17:47 2018 -0500 @@ -28,7 +28,7 @@ ;; polarized ends, with one end being called 3′ and the other being 5′, but I'm ;; not 100% sure. -(defun reverse-complement (dna) +(defun nreverse-complement (dna) (flet ((dna-complement (base) (case base (#\A #\T) @@ -38,9 +38,12 @@ (map-into dna #'dna-complement dna) (nreverse dna))) +(defun reverse-complement (dna) + (nreverse-complement (copy-seq dna))) + (define-problem revc (data string) "AAAACCCGGT" "ACCGGGTTTT" "Return the reverse complement of `data`." - (reverse-complement (delete #\newline data))) + (nreverse-complement (delete #\newline data)))