--- a/rosalind.asd Thu Nov 01 19:46:55 2018 -0400
+++ b/rosalind.asd Thu Nov 01 21:10:34 2018 -0400
@@ -23,5 +23,9 @@
(:file "package")
(:module "src" :serial t
:components ((:file "utils")
- (:file "problems")))))
+ (:module "problems"
+ :components ((:file "dna")
+ (:file "rna")
+ (:file "revc")
+ (:file "gc")))))))
--- a/src/problems.lisp Thu Nov 01 19:46:55 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-(in-package :rosalind)
-
-;;;; Testing ------------------------------------------------------------------
-(defmacro define-test (problem input output &optional (test 'string=))
- `(test ,(symb 'test- problem)
- (is (,test ,output (,problem ,input)))))
-
-(defun run-tests ()
- (1am:run))
-
-
-;;;; Problems -----------------------------------------------------------------
-(defmacro define-problem
- ((number name) args sample-input sample-output &body body)
- (let ((symbol (symb 'problem- number)))
- `(progn
- (defun ,symbol ,args ,@body)
- (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
- (define-test ,symbol ,sample-input ,sample-output)
- ',symbol)))
-
-
-(define-problem (1 dna) (data)
- "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
- "20 12 17 21"
- ;; A string is simply an ordered collection of symbols selected from some
- ;; alphabet and formed into a word; the length of a string is the number of
- ;; symbols that it contains.
- ;;
- ;; An example of a length 21 DNA string (whose alphabet contains the symbols
- ;; 'A', 'C', 'G', and 'T') is "ATGCTTCAGAAAGGTCTTACG."
- ;;
- ;; Given: A DNA string s of length at most 1000 nt.
- ;; Return: Four integers (separated by spaces) counting the respective number
- ;; of times that the symbols 'A', 'C', 'G', and 'T' occur in s.
- (let ((results (frequencies data)))
- (format nil "~D ~D ~D ~D"
- (gethash #\A results)
- (gethash #\C results)
- (gethash #\G results)
- (gethash #\T results))))
-
-(define-problem (2 rna) (data)
- "GATGGAACTTGACTACGTAAATT"
- "GAUGGAACUUGACUACGUAAAUU"
- ;; An RNA string is a string formed from the alphabet containing 'A', 'C',
- ;; 'G', and 'U'.
- ;;
- ;; Given a DNA string t corresponding to a coding strand, its transcribed RNA
- ;; string u is formed by replacing all occurrences of 'T' in t with 'U' in u.
- ;;
- ;; Given: A DNA string t having length at most 1000 nt.
- ;;
- ;; Return: The transcribed RNA string of t.
- (substitute #\U #\T data))
-
-(define-problem (3 revc) (data)
- "AAAACCCGGT"
- "ACCGGGTTTT"
- ;; In DNA strings, symbols 'A' and 'T' are complements of each other, as are
- ;; 'C' and 'G'.
- ;;
- ;; The reverse complement of a DNA string s is the string sc formed by
- ;; reversing the symbols of s, then taking the complement of each symbol
- ;; (e.g., the reverse complement of "GTCA" is "TGAC").
- ;;
- ;; Given: A DNA string s of length at most 1000 bp.
- ;;
- ;; Return: The reverse complement sc of s.
- (copyf data)
- (flet ((dna-complement (base)
- (case base
- (#\A #\T)
- (#\T #\A)
- (#\G #\C)
- (#\C #\G)
- (t base)))) ; newline etc
- (map-into data #'dna-complement data)
- (nreverse data)))
-
-
-;;;; Solutions ----------------------------------------------------------------
-(defun read-problem-data (problem)
- (-<> (get problem 'rosalind-name)
- (format nil "~~/Downloads/rosalind_~A.txt" <>)
- read-file-into-string))
-
-(defun solve% (problem)
- (pbcopy (funcall problem (read-problem-data problem))))
-
-(defmacro solve (problem)
- `(solve% ',problem))
-
-
-;; (problem-3 "AAAACCCGGT")
-
-;; (solve problem-3)
-
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/dna.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -0,0 +1,23 @@
+(in-package :rosalind)
+
+;; A string is simply an ordered collection of symbols selected from some
+;; alphabet and formed into a word; the length of a string is the number of
+;; symbols that it contains.
+;;
+;; An example of a length 21 DNA string (whose alphabet contains the symbols
+;; 'A', 'C', 'G', and 'T') is "ATGCTTCAGAAAGGTCTTACG."
+;;
+;; Given: A DNA string s of length at most 1000 nt. Return: Four integers
+;; (separated by spaces) counting the respective number of times that the
+;; symbols 'A', 'C', 'G', and 'T' occur in s.
+
+(define-problem dna (data)
+ "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
+ "20 12 17 21"
+ (let ((results (frequencies data)))
+ (format nil "~D ~D ~D ~D"
+ (gethash #\A results)
+ (gethash #\C results)
+ (gethash #\G results)
+ (gethash #\T results))))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/gc.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -0,0 +1,55 @@
+(in-package :rosalind)
+
+;; The GC-content of a DNA string is given by the percentage of symbols in
+;; the string that are 'C' or 'G'. For example, the GC-content of "AGCTATAG"
+;; is 37.5%. Note that the reverse complement of any DNA string has the same
+;; GC-content.
+;;
+;; DNA strings must be labeled when they are consolidated into a database.
+;; A commonly used method of string labeling is called FASTA format. In this
+;; format, the string is introduced by a line that begins with '>', followed
+;; by some labeling information. Subsequent lines contain the string itself;
+;; the first line to begin with '>' indicates the label of the next string.
+;;
+;; In Rosalind's implementation, a string in FASTA format will be labeled by
+;; the ID "Rosalind_xxxx", where "xxxx" denotes a four-digit code between 0000
+;; and 9999.
+;;
+;; Given: At most 10 DNA strings in FASTA format (of length at most 1 kbp each).
+;;
+;; Return: The ID of the string having the highest GC-content, followed by the
+;; GC-content of that string. Rosalind allows for a default error of 0.001 in
+;; all decimal answers unless otherwise stated; please see the note on
+;; absolute error below.
+
+(defparameter *input-gc* ">Rosalind_6404
+CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
+TCCCACTAATAATTCTGAGG
+>Rosalind_5959
+CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
+ATATCCATTTGTCAGCAGACACGC
+>Rosalind_0808
+CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
+TGGGAACCTGCGGGCAGTAGGTGGAAT")
+(defparameter *output-gc* "Rosalind_0808
+60.919540")
+
+
+(define-problem gc (data)
+ *input-gc*
+ *output-gc*
+ (labels ((gcp (base)
+ (or (char= #\G base)
+ (char= #\C base)))
+ (gc-content (string)
+ (/ (count-if #'gcp string)
+ (length string))))
+ (iterate
+ (for (label dna) :in-fasta data)
+ (for gc = (gc-content dna))
+ (finding (format nil "~A~%~,6F" label (* 100 gc))
+ :maximizing gc))))
+
+
+;; (problem-gc *input-gc*)
+;; (solve problem-gc)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/revc.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -0,0 +1,27 @@
+(in-package :rosalind)
+
+;; In DNA strings, symbols 'A' and 'T' are complements of each other, as are 'C'
+;; and 'G'.
+;;
+;; The reverse complement of a DNA string s is the string sc formed by reversing
+;; the symbols of s, then taking the complement of each symbol (e.g., the
+;; reverse complement of "GTCA" is "TGAC").
+;;
+;; Given: A DNA string s of length at most 1000 bp.
+;;
+;; Return: The reverse complement sc of s.
+
+(define-problem revc (data)
+ "AAAACCCGGT"
+ "ACCGGGTTTT"
+ (copyf data)
+ (flet ((dna-complement (base)
+ (case base
+ (#\A #\T)
+ (#\T #\A)
+ (#\G #\C)
+ (#\C #\G)
+ (t base)))) ; newline etc
+ (map-into data #'dna-complement data)
+ (nreverse data)))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/rna.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -0,0 +1,17 @@
+(in-package :rosalind)
+
+;; An RNA string is a string formed from the alphabet containing 'A', 'C', 'G',
+;; and 'U'.
+;;
+;; Given a DNA string t corresponding to a coding strand, its transcribed RNA
+;; string u is formed by replacing all occurrences of 'T' in t with 'U' in u.
+;;
+;; Given: A DNA string t having length at most 1000 nt.
+;;
+;; Return: The transcribed RNA string of t.
+
+(define-problem rna (data)
+ "GATGGAACTTGACTACGTAAATT"
+ "GAUGGAACUUGACUACGUAAAUU"
+ (substitute #\U #\T data))
+
--- a/src/utils.lisp Thu Nov 01 19:46:55 2018 -0400
+++ b/src/utils.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -1,6 +1,7 @@
(in-package :rosalind)
+;;;; Misc ---------------------------------------------------------------------
(defun sh (command input)
(declare (ignorable command input))
#+sbcl
@@ -25,3 +26,93 @@
(defmacro copyf (sequence)
`(setf ,sequence (copy-seq ,sequence)))
+
+(defun ensure-stream (input)
+ (ctypecase input
+ (stream input)
+ (string (make-string-input-stream input))))
+
+(defun nconcatenate (v1 v2)
+ (let* ((l1 (length v1))
+ (l2 (length v2))
+ (needed (+ l1 l2)))
+ (when (< (array-total-size v1) needed)
+ (adjust-array v1 (max needed (* l1 2))))
+ (setf (fill-pointer v1) needed)
+ (replace v1 v2 :start1 l1)
+ (values)))
+
+(defun make-buffer (&optional (capacity 64))
+ (make-array capacity :element-type 'character :adjustable t :fill-pointer 0))
+
+(defun round-to (number precision)
+ "Round `number` to the given `precision`.
+
+ Examples:
+
+ (round-to 13 10) ; => 10
+ (round-to 15 10) ; => 20
+ (round-to 44 25) ; => 50
+ (round-to 457/87 1/2) ; => 11/2
+
+ "
+ (* precision (round number precision)))
+
+
+;;;; Testing ------------------------------------------------------------------
+(defmacro define-test (problem input output &optional (test 'string=))
+ `(test ,(symb 'test- problem)
+ (is (,test ,output (,problem ,input)))))
+
+(defun run-tests ()
+ (1am:run))
+
+
+;;;; File Formats -------------------------------------------------------------
+
+(defmacro-driver (FOR vars IN-FASTA source)
+ (nest
+ (destructuring-bind (label line) vars)
+ (with-gensyms (stream l))
+ (let ((kwd (if generate 'generate 'for))))
+ `(progn
+ (with ,label = nil)
+ (with ,stream = (ensure-stream ,source))
+ (,kwd ,line :do-next
+ (labels ((labelp (line)
+ (char= #\> (aref line 0)))
+ (parse-next ()
+ (let ((,l (read-line ,stream nil nil nil)))
+ (cond
+ ((null ,l) (terminate))
+ ((zerop (length ,l)) (parse-next))
+ ((labelp ,l) (progn (setf ,label (subseq ,l 1)
+ ,line (make-buffer))
+ (parse-next)))
+ (t (progn (nconcatenate ,line ,l)
+ (unless (char= #\> (peek-char nil ,stream nil #\>)) ; yuck
+ (parse-next))))))))
+ (parse-next))))))
+
+
+
+;;;; Problems -----------------------------------------------------------------
+(defmacro define-problem (name args sample-input sample-output &body body)
+ (let ((symbol (symb 'problem- name)))
+ `(progn
+ (defun ,symbol ,args ,@body)
+ (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
+ (define-test ,symbol ,sample-input ,sample-output)
+ ',symbol)))
+
+(defun read-problem-data (problem)
+ (-<> (get problem 'rosalind-name)
+ (format nil "~~/Downloads/rosalind_~A.txt" <>)
+ read-file-into-string))
+
+(defun solve% (problem)
+ (pbcopy (funcall problem (read-problem-data problem))))
+
+(defmacro solve (problem)
+ `(solve% ',problem))
+
--- a/vendor/quickutils-package.lisp Thu Nov 01 19:46:55 2018 -0400
+++ b/vendor/quickutils-package.lisp Thu Nov 01 21:10:34 2018 -0400
@@ -1,10 +1,10 @@
(eval-when (:compile-toplevel :load-toplevel :execute)
- (unless (find-package "EULER.QUICKUTILS")
- (defpackage "EULER.QUICKUTILS"
+ (unless (find-package "ROSALIND.QUICKUTILS")
+ (defpackage "ROSALIND.QUICKUTILS"
(:documentation "Package that contains Quickutil utility functions.")
(:use #:cl))))
-(in-package "EULER.QUICKUTILS")
+(in-package "ROSALIND.QUICKUTILS")
;; need to define this here so sbcl will shut the hell up about it being
;; undefined when compiling quickutils.lisp. computers are trash.