--- a/rosalind.asd Thu Nov 08 20:36:22 2018 -0500
+++ b/rosalind.asd Thu Nov 08 21:17:47 2018 -0500
@@ -42,7 +42,9 @@
(:file "iprb")
(:file "lcsm")
(:file "lia")
+ (:file "mprt")
(:file "mrna")
+ (:file "orf")
(:file "perm")
(:file "prot")
(:file "prtm")
--- a/src/problems/mprt.lisp Thu Nov 08 20:36:22 2018 -0500
+++ b/src/problems/mprt.lisp Thu Nov 08 21:17:47 2018 -0500
@@ -1,5 +1,10 @@
(in-package :rosalind)
+;; This was pretty simple, except for discovering that cl-ppcre's all-matches
+;; function skips overlapping matches. Otherwise we just convert the motif to
+;; a regex and handle grabbing the data from Uniprot (which is straightforward
+;; but can be slow).
+
(defparameter *input-mprt*
"A2Z669
B5ZC00
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/orf.lisp Thu Nov 08 21:17:47 2018 -0500
@@ -0,0 +1,35 @@
+(in-package :rosalind)
+
+(defparameter *input-orf*
+ ">Rosalind_99
+AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG")
+
+(defparameter *output-orf*
+ "M
+MTPRLGLESLLE
+MGMTPRLGLESLLE
+MLLGSFRLIPKETLIQVAGSSPCNLS")
+
+
+(defun translate-all (rna)
+ "Return all possible proteins that can be translated from `rna`."
+ (iterate
+ (for start :first 0 :then (1+ protein-start))
+ (for (values protein protein-start) = (translate rna :start start))
+ (while protein)
+ (collect protein)))
+
+(define-problem orf (data stream)
+ *input-orf*
+ *output-orf*
+ (let* ((dna (cdr (first (read-fasta-into-alist data))))
+ (rna1 (transcribe dna))
+ (rna2 (transcribe (reverse-complement dna))))
+ (with-output-to-string (s)
+ (-<> (append (translate-all rna1)
+ (translate-all rna2))
+ (remove-duplicates <> :test #'string=)
+ (sort <> #'< :key #'length)
+ (format s "~{~A~^~%~}" <>)))))
+
+
--- a/src/problems/prot.lisp Thu Nov 08 20:36:22 2018 -0500
+++ b/src/problems/prot.lisp Thu Nov 08 21:17:47 2018 -0500
@@ -58,11 +58,29 @@
("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G)))
(defun translate (rna &key (start 0))
- "Translate a string of RNA bases into a protein string of amino acids."
- (iterate (for i :from (search "AUG" rna :start2 start) :by 3)
- (for protein = (codon-to-protein rna i))
- (while protein)
- (collect protein :result-type 'string)))
+ "Translate a string of RNA bases into a protein string of amino acids.
+
+ `rna` will be searched (beginning at `start`) for a start codon and
+ translation will proceed from there. If no start codon occurs after `start`
+ then `nil` will be returned.
+
+ Once a start codon has been found, translation proceeds to the next stop
+ codon. If no stop codon is present, `nil` will be returned.
+
+ Otherwise two values are returned: the protein string and the index into `rna`
+ where it started.
+
+ "
+ (when-let ((start (search "AUG" rna :start2 start)))
+ (values
+ (iterate (with limit = (- (length rna) 3))
+ (for i :from start :by 3)
+ (when (> i limit)
+ (return-from translate (values nil nil)))
+ (for protein = (codon-to-protein rna i))
+ (while protein)
+ (collect protein :result-type 'string))
+ start)))
(define-problem prot (data string)
--- a/src/problems/revc.lisp Thu Nov 08 20:36:22 2018 -0500
+++ b/src/problems/revc.lisp Thu Nov 08 21:17:47 2018 -0500
@@ -28,7 +28,7 @@
;; polarized ends, with one end being called 3′ and the other being 5′, but I'm
;; not 100% sure.
-(defun reverse-complement (dna)
+(defun nreverse-complement (dna)
(flet ((dna-complement (base)
(case base
(#\A #\T)
@@ -38,9 +38,12 @@
(map-into dna #'dna-complement dna)
(nreverse dna)))
+(defun reverse-complement (dna)
+ (nreverse-complement (copy-seq dna)))
+
(define-problem revc (data string)
"AAAACCCGGT"
"ACCGGGTTTT"
"Return the reverse complement of `data`."
- (reverse-complement (delete #\newline data)))
+ (nreverse-complement (delete #\newline data)))