--- a/src/package.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/package.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,9 +1,49 @@
-(defpackage :rosalind
+(defpackage :rosalind/utils
+ (:nicknames :u)
(:use :cl :iterate :losh)
- (:import-from :1am :is)
(:import-from :alexandria
:curry :rcurry :compose
:ensure-gethash
:with-gensyms :once-only :symbolicate)
- (:shadowing-import-from :1am :test)
- (:export :run-tests))
+ (:export
+ :curry :rcurry :compose
+ :ensure-gethash
+ :with-gensyms :once-only :symbolicate
+
+ :define-problem :solve
+
+ :factorial
+
+ :permutations
+
+ :dna-complement :reverse-complement :nreverse-complement
+ :transcribe :ntranscribe
+ :translate
+
+ :gcp :base-probability :sequence-probability
+
+ :mapcount
+
+ :string-empty-p
+ :first-char
+
+ :Σ :Π :binomial-coefficient
+
+ :returning-final
+
+ :read-lines
+ :read-fasta
+ :read-fasta-into-hash-table
+ :read-fasta-into-alist
+
+ :buffering
+
+ :uniprot
+
+ :float-string
+
+ :run-tests :solve))
+
+(defpackage :rosalind
+ (:import-from :rosalind/utils :run-tests :define-problem :solve)
+ (:export :run-tests :define-problem :solve))
--- a/src/problems/cons.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/cons.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,6 +1,7 @@
-(in-package :rosalind)
+(defpackage :rosalind/cons (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/cons)
-(defparameter *input-cons* ">Rosalind_1
+(defparameter *input* ">Rosalind_1
ATCCAGCT
>Rosalind_2
GGGCAACT
@@ -15,7 +16,7 @@
>Rosalind_7
ATGGCACT")
-(defparameter *output-cons* "ATGCAACT
+(defparameter *output* "ATGCAACT
A: 5 1 0 0 5 5 0 0
C: 0 0 1 4 2 0 6 1
G: 1 1 6 3 0 1 0 0
@@ -114,9 +115,7 @@
(collecting winner :result-type 'string)))
-(define-problem cons (data stream)
- *input-cons*
- *output-cons*
+(define-problem cons (data stream) *input* *output*
(let* ((profile-matrix (profile-matrix-from-fasta data))
(consensus-string (consensus-string profile-matrix))
(length (length consensus-string)))
--- a/src/problems/dna.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/dna.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/dna (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/dna)
;; Nucleic acids are polymers, which means they're long, repeating chains of
;; smaller molecules called monomers.
--- a/src/problems/eval.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/eval.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,20 +1,19 @@
-(in-package :rosalind)
+(defpackage :rosalind/eval (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/eval)
-(defparameter *input-eval* "10
+(defparameter *input* "10
AG
0.25 0.5 0.75")
-(defparameter *output-eval* "0.422 0.563 0.422")
+(defparameter *output* "0.422 0.563 0.422")
-(define-problem eval (data stream)
- *input-eval*
- *output-eval*
+(define-problem eval (data stream) *input* *output*
(let* ((string-length (read data))
(substring (read-line data))
(gc-contents (read-all data))
(chances (- string-length (1- (length substring)))))
- (float-string (mapcar (lambda (gc-content)
- (* chances (sequence-probability
- (coerce gc-content 'double-float)
- substring)))
- gc-contents))))
+ (u:float-string (mapcar (lambda (gc-content)
+ (* chances (u:sequence-probability
+ (coerce gc-content 'double-float)
+ substring)))
+ gc-contents))))
--- a/src/problems/fib.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/fib.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/fib (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/fib)
(define-problem fib (data stream)
"5 3"
--- a/src/problems/fibd.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/fibd.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,9 +1,10 @@
-(in-package :rosalind)
+(defpackage :rosalind/fibd (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/fibd)
(define-problem fibd (data stream)
"6 3"
"4"
- (iter
+ (iterate
(with months = (read data))
(with lifespan = (read data))
(for month :from 2 :to months)
@@ -23,12 +24,14 @@
(births month))))
;; We initialize the buffers with NIL in index 0 for the 1-based months,
;; and 1 in index 0 for the initial pair of rabbits.
- (buffering (returning-final (population month))
- :into population
- :initial-contents '(nil 1))
- (buffering (births month)
- :into births
- :initial-contents '(nil 1)))))
+ (u:buffering (u:returning-final (population month))
+ :into population
+ :initial-contents '(nil 1))
+ (u:buffering (births month)
+ :into births
+ :initial-contents '(nil 1)))))
+
+#; Scratch --------------------------------------------------------------------
(problem-fibd "45 6")
;; (solve fibd)
--- a/src/problems/gc.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/gc.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,6 +1,7 @@
-(in-package :rosalind)
+(defpackage :rosalind/gc (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/gc)
-(defparameter *input-gc* ">Rosalind_6404
+(defparameter *input* ">Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
>Rosalind_5959
@@ -10,13 +11,11 @@
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT")
-(defparameter *output-gc* "Rosalind_0808
+(defparameter *output* "Rosalind_0808
60.919540")
-(define-problem gc (data stream)
- *input-gc*
- *output-gc*
+(define-problem gc (data stream) *input* *output*
(labels ((gcp (base)
(or (char= #\G base)
(char= #\C base)))
@@ -28,7 +27,3 @@
(for gc = (gc-content dna))
(finding (format nil "~A~%~,6F" label (* 100 gc))
:maximizing gc))))
-
-
-;; (problem-gc *input-gc*)
-;; (solve gc)
--- a/src/problems/grph.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/grph.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,6 +1,7 @@
-(in-package :rosalind)
+(defpackage :rosalind/grph (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/grph)
-(defparameter *input-grph*
+(defparameter *input*
">Rosalind_0498
AAATAAA
>Rosalind_2391
@@ -12,7 +13,7 @@
>Rosalind_5013
GGGTGGG")
-(defparameter *output-grph*
+(defparameter *output*
"Rosalind_0498 Rosalind_0442
Rosalind_0498 Rosalind_2391
Rosalind_2391 Rosalind_2323
@@ -34,10 +35,8 @@
:end2 k))
-(define-problem grph (data stream)
- *input-grph*
- *output-grph*
- (let* ((data (read-fasta-into-hash-table data))
+(define-problem grph (data stream) *input* *output*
+ (let* ((data (u:read-fasta-into-hash-table data))
(graph (digraph:make-digraph
:test #'equal
:initial-vertices (alexandria:hash-table-keys data))))
--- a/src/problems/hamm.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/hamm.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,12 +1,21 @@
-(in-package :rosalind)
+(defpackage :rosalind/hamm (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/hamm)
+
+(defparameter *input* "GAGCCTACTAACGGGAT
+CATCGTAATGACGGCCT")
-(defparameter *input-hamm* "GAGCCTACTAACGGGAT
-CATCGTAATGACGGCCT")
+(defun hamming (sequence1 sequence2 &key (test #'eql))
+ "Return the Hamming distance between `sequence1` and `sequence2`."
+ ;; todo assert length=?
+ (let ((result 0))
+ (map nil (lambda (x y)
+ (unless (funcall test x y)
+ (incf result)))
+ sequence1
+ sequence2)
+ result))
(define-problem hamm (data stream)
- *input-hamm*
+ *input*
"7"
(hamming (read-line data) (read-line data) :test #'char=))
-
-;; (problem-hamm *input-hamm*)
-;; (solve hamm)
--- a/src/problems/iev.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/iev.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/iev (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/iev)
(define-problem iev (data stream)
"1 0 0 1 0 1"
@@ -9,7 +10,7 @@
(hh (read data))
(hr (read data))
(rr (read data)))
- (float-string
+ (u:float-string
;; It's just a weighted average…
(* 2 (+ (* dd 1)
(* dh 1)
--- a/src/problems/inod.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/inod.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/inod (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/inod)
;; This one is trivial once you know the closed-form solution of N-2. The
;; intuition for that can come in two parts.
--- a/src/problems/iprb.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/iprb.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/iprb (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/iprb)
(define-problem iprb (data stream)
"2 2 2"
@@ -24,7 +25,7 @@
;; = 2XY/N²-N
(/ (* 2 x y)
(- (* n n) n))))
- (float-string
+ (u:float-string
(+ (* (p-same d d) 1) ;; AA AA
(* (p-diff d h) 1) ;; AA Aa
(* (p-diff d r) 1) ;; AA aa
@@ -32,6 +33,3 @@
(* (p-diff h r) 1/2) ;; Aa aa
(* (p-same r r) 0))
5)))) ;; aa aa
-
-;; (problem-iprb)
-;; (solve iprb)
--- a/src/problems/kmer.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/kmer.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,6 +1,7 @@
-(in-package :rosalind)
+(defpackage :rosalind/kmer (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/kmer)
-(defparameter *input-kmer* ">Rosalind_6431
+(defparameter *input* ">Rosalind_6431
CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG
CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT
TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA
@@ -9,7 +10,7 @@
CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA
CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG")
-(defparameter *output-kmer* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1")
+(defparameter *output* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1")
(defun mapc-kmers (function n &key (copy t))
@@ -25,18 +26,16 @@
(map nil #'branch "ACGT"))))))
(defun map-kmers (function n)
- (gathering (mapc-kmers (compose #'gather function) n)))
+ (gathering (mapc-kmers (u:compose #'gather function) n)))
(defun kmers (n)
(map-kmers #'identity n))
-(define-problem kmer (data stream)
- *input-kmer*
- *output-kmer*
+(define-problem kmer (data stream) *input* *output*
(iterate
(with n = 4)
- (with seq = (nth-value 1 (read-fasta data)))
+ (with seq = (nth-value 1 (u:read-fasta data)))
(with counts = (make-hash-table :test #'equal))
(for i :from 0 :to (- (length seq) n))
(for kmer = (subseq seq i (+ i n)))
--- a/src/problems/lcsm.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/lcsm.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,15 +1,18 @@
-(in-package :rosalind)
+(defpackage :rosalind/lcsm (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lcsm)
;; This one sucked. We should use suffix trees some day but I just hacked this
;; together for now.
-(defparameter *input-lcsm* ">Rosalind_1
+(defparameter *input* ">Rosalind_1
GATTACA
>Rosalind_2
TAGACCA
>Rosalind_3
ATACA")
+(defparameter *output* "AC")
+
(defun compute-substring-table% (string1 string2)
;; Compute the table of substring lengths.
;;
@@ -57,7 +60,7 @@
(defun longest (strings)
"Return a list of the longest strings in `strings`."
(remove-if-not
- (curry #'= (alexandria:extremum (mapcar #'length strings) #'>))
+ (u:curry #'= (alexandria:extremum (mapcar #'length strings) #'>))
strings :key #'length))
(defun longest-common-substrings-of-any (substrings string)
@@ -68,10 +71,8 @@
longest
(remove-duplicates <> :test #'string=)))
-(define-problem lcsm (data stream)
- *input-lcsm*
- "AC"
- (let ((lines (mapcar #'cdr (read-fasta-into-alist data))))
+(define-problem lcsm (data stream) *input* *output*
+ (let ((lines (mapcar #'cdr (u:read-fasta-into-alist data))))
(-<> (reduce #'longest-common-substrings-of-any (rest lines)
:initial-value (list (first lines)))
(sort <> #'string<) ; tests
--- a/src/problems/lexf.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/lexf.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,10 +1,11 @@
-(in-package :rosalind)
+(defpackage :rosalind/lexf (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lexf)
-(defparameter *input-lexf*
+(defparameter *input*
"A C G T
2")
-(defparameter *output-lexf*
+(defparameter *output*
"AA
AC
AG
@@ -24,9 +25,7 @@
")
-(define-problem lexf (data stream)
- *input-lexf*
- *output-lexf*
+(define-problem lexf (data stream) *input* *output*
(let* ((alphabet (sort (remove #\space (read-line data)) #'char<))
(n (read data))
(string (make-string n)))
--- a/src/problems/lexv.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/lexv.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,10 +1,11 @@
-(in-package :rosalind)
+(defpackage :rosalind/lexv (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lexv)
-(defparameter *input-lexv*
+(defparameter *input*
"D N A
3")
-(defparameter *output-lexv*
+(defparameter *output*
"D
DD
DDD
@@ -47,9 +48,7 @@
")
-(define-problem lexv (data stream)
- *input-lexv*
- *output-lexv*
+(define-problem lexv (data stream) *input* *output*
(let* ((alphabet (remove #\space (read-line data)))
(n (read data))
(string (make-string n)))
--- a/src/problems/lgis.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/lgis.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,10 +1,11 @@
-(in-package :rosalind)
+(defpackage :rosalind/lgis (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lgis)
-(defparameter *input-lgis*
+(defparameter *input*
"5
5 1 4 2 3")
-(defparameter *output-lgis*
+(defparameter *output*
"1 2 3
5 4 3")
@@ -104,7 +105,7 @@
(for (values nil tail-index) =
(bisect-right predicate tail-indexes value
:start 1 ; ignore the garbage
- :key (curry #'aref sequence))) ; deref when bisecting
+ :key (u:curry #'aref sequence))) ; deref when bisecting
(if tail-index
(progn
;; Found a more minimal tail for existing subseq
@@ -122,9 +123,7 @@
result-type)))
-(define-problem lgis (data stream)
- *input-lgis*
- *output-lgis*
+(define-problem lgis (data stream) *input* *output*
(let* ((size (read data))
(elements (gimme size (read data))))
(with-output-to-string (s)
--- a/src/problems/lia.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/lia.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/lia (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lia)
;; When a heterozygous organism mates, its offspring have a 50% chance to be
;; heterozygous themselves, *regardless of what the other mate happens to be*:
@@ -30,7 +31,7 @@
;; so we just sum up the probabilities of all of them.
(let ((failures (- trials successes))
(failure-probability (- 1 success-probability)))
- (* (binomial-coefficient trials successes)
+ (* (u:binomial-coefficient trials successes)
(expt success-probability successes)
(expt failure-probability failures))))
@@ -42,7 +43,7 @@
"
;; P(≥S) = P(=S) + P(=S+1) + … + P(N)
- (Σ (n successes trials)
+ (u:Σ (n successes trials)
(bernoulli-exactly n trials success-probability)))
(define-problem lia (data stream)
@@ -51,4 +52,4 @@
(let* ((generations (read data))
(target (read data))
(population (expt 2 generations)))
- (float-string (bernoulli-at-least target population 1/4))))
+ (u:float-string (bernoulli-at-least target population 1/4))))
--- a/src/problems/long.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/long.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,6 +1,7 @@
-(in-package :rosalind)
+(defpackage :rosalind/long (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/long)
-(defparameter *input-long*
+(defparameter *input*
">Rosalind_56
ATTAGACCTG
>Rosalind_57
@@ -10,7 +11,7 @@
>Rosalind_59
GCCGGAATAC")
-(defparameter *output-long*
+(defparameter *output*
"ATTAGACCTGCCGGAATAC")
@@ -26,10 +27,8 @@
(concatenate 'string left (subseq right (overlap left right))))
-(define-problem long (data stream)
- *input-long*
- *output-long*
- (let* ((dna (mapcar #'cdr (read-fasta-into-alist data)))
+(define-problem long (data stream) *input* *output*
+ (let* ((dna (mapcar #'cdr (u:read-fasta-into-alist data)))
(graph (digraph:make-digraph :initial-vertices dna :test #'equal)))
(dolist (left dna)
(dolist (right dna)
--- a/src/problems/mprt.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/mprt.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/mprt (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/mprt)
;; This was pretty simple, except for discovering that cl-ppcre's all-matches
;; function skips overlapping matches. Otherwise we just convert the motif to
@@ -62,7 +63,7 @@
(iterate
(with n-glycosylation = (motif-to-regex *motif-n-glycosylation*))
(for id :in-stream data :using #'read-line)
- (for (nil . protein) = (uniprot id))
+ (for (nil . protein) = (u:uniprot id))
(for matches = (all-matches-dammit n-glycosylation protein))
(when matches
(format s "~A~%~{~D~*~^ ~}~%" id (mapcar #'1+ matches))))))
--- a/src/problems/mrna.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/mrna.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/mrna (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/mrna)
;; We're using a real programming language, so we have actual numbers and don't
;; need to bother with modular arithmetic for the tiny inputs they're giving us
@@ -49,9 +50,7 @@
(length (acid-to-codons acid)))
-(define-problem mrna (data string)
- "MA"
- "12"
+(define-problem mrna (data string) "MA" "12"
(product (delete #\newline data)
:modulo 1000000
:key #'acid-codon-count
--- a/src/problems/orf.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/orf.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,10 +1,11 @@
-(in-package :rosalind)
+(defpackage :rosalind/orf (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/orf)
-(defparameter *input-orf*
+(defparameter *input*
">Rosalind_99
AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG")
-(defparameter *output-orf*
+(defparameter *output*
"M
MTPRLGLESLLE
MGMTPRLGLESLLE
@@ -15,16 +16,14 @@
"Return all possible proteins that can be translated from `rna`."
(iterate
(for start :first 0 :then (1+ protein-start))
- (for (values protein protein-start) = (translate rna :start start))
+ (for (values protein protein-start) = (u:translate rna :start start))
(while protein)
(collect protein)))
-(define-problem orf (data stream)
- *input-orf*
- *output-orf*
- (let* ((dna (cdr (first (read-fasta-into-alist data))))
- (rna1 (transcribe dna))
- (rna2 (transcribe (reverse-complement dna))))
+(define-problem orf (data stream) *input* *output*
+ (let* ((dna (cdr (first (u:read-fasta-into-alist data))))
+ (rna1 (u:transcribe dna))
+ (rna2 (u:transcribe (u:reverse-complement dna))))
(-<> (append (translate-all rna1)
(translate-all rna2))
(remove-duplicates <> :test #'string=)
--- a/src/problems/perm.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/perm.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,8 +1,9 @@
-(in-package :rosalind)
+(defpackage :rosalind/perm (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/perm)
-(defparameter *input-perm* "3")
+(defparameter *input* "3")
-(defparameter *output-perm* "6
+(defparameter *output* "6
1 2 3
1 3 2
2 1 3
@@ -11,16 +12,11 @@
3 2 1")
-(define-problem perm (data string)
- *input-perm*
- *output-perm*
+(define-problem perm (data string) *input* *output*
(let* ((n (parse-integer data))
- (count (factorial n))
- (perms (permutations (alexandria:iota n :start 1))))
+ (count (u:factorial n))
+ (perms (u:permutations (alexandria:iota n :start 1))))
(format nil "~D~%~{~A~^~%~}"
count
;; sort to ensure consistent output for the unit test
- (sort (mapcar (curry #'str:join " ") perms) #'string<))))
-
-;; (problem-perm "3")
-;; (solve perm)
+ (sort (mapcar (u:curry #'str:join " ") perms) #'string<))))
--- a/src/problems/pmch.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/pmch.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,9 +1,10 @@
-(in-package :rosalind)
+(defpackage :rosalind/pmch (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/pmch)
-(defparameter *input-pmch* ">Rosalind_23
+(defparameter *input* ">Rosalind_23
AGCUAGUCAU")
-(defparameter *output-pmch* "12")
+(defparameter *output* "12")
;; We can make a few observations to make things easier (well, trivial).
;;
@@ -24,9 +25,7 @@
;; adenine we have N-1 uracils. And so on down to the final pair. So the total
;; number of choices we have for each graph is N(N-1)(N-2)…(1) = N!
-(define-problem pmch (data stream)
- *input-pmch*
- *output-pmch*
- (let ((bases (nth-value 1 (read-fasta data))))
- (* (factorial (count #\A bases))
- (factorial (count #\G bases)))))
+(define-problem pmch (data stream) *input* *output*
+ (let ((bases (nth-value 1 (u:read-fasta data))))
+ (* (u:factorial (count #\A bases))
+ (u:factorial (count #\G bases)))))
--- a/src/problems/pper.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/pper.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,15 +1,14 @@
-(in-package :rosalind)
+(defpackage :rosalind/pper (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/pper)
-(defparameter *input-pper*
+(defparameter *input*
"21 7")
-(defparameter *output-pper*
+(defparameter *output*
"51200")
-(define-problem pper (data stream)
- *input-pper*
- *output-pper*
+(define-problem pper (data stream) *input* *output*
(let ((total (read data))
(size (read data)))
;; The number of combinations of k out of n elements is:
--- a/src/problems/prob.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/prob.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,21 +1,19 @@
-(in-package :rosalind)
+(defpackage :rosalind/prob (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/prob)
-(defparameter *input-prob*
+(defparameter *input*
"ACGATACAA
0.129 0.287 0.423 0.476 0.641 0.742 0.783")
-(defparameter *output-prob*
+(defparameter *output*
"-5.737 -5.217 -5.263 -5.360 -5.958 -6.628 -7.009")
-(define-problem prob (data stream)
- *input-prob*
- *output-prob*
+(define-problem prob (data stream) *input* *output*
(let ((dna (read-line data))
(gc-contents (read-all-from-string (read-line data))))
- (flet
- ((prob (gc-content)
- (iterate
- (for base :in-string dna)
- (summing (log (base-probability gc-content base) 10)))))
- (float-string (mapcar #'prob gc-contents)))))
+ (flet ((prob (gc-content)
+ (iterate
+ (for base :in-string dna)
+ (summing (log (u:base-probability gc-content base) 10)))))
+ (u:float-string (mapcar #'prob gc-contents)))))
--- a/src/problems/prot.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/prot.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,91 +1,7 @@
-(in-package :rosalind)
-
-(defmacro codon-case ((vector index) &rest clauses)
- ;; Compiles a giant list of clauses into a tree of ECASEs.
- ;;
- ;; Each codon will have at most 3 ECASEs to pass through. Each ECASE has at
- ;; most four options, so in the worst case we end up with 3 * 4 = 12
- ;; comparisons instead of 64.
- ;;
- ;; If we ever convert bases to vectors of (unsigned-byte 2)s we could
- ;; potentially use a lookup table here, e.g.:
- ;;
- ;; (aref +amino-acids+ (+ x (ash y 2) (ash z 4)))
- (alexandria:once-only (vector index)
- (alexandria:with-gensyms (x y z)
- `(let ((,x (aref ,vector ,index))
- (,y (aref ,vector (+ ,index 1)))
- (,z (aref ,vector (+ ,index 2))))
- ,(labels ((strip (clauses)
- (if (= 1 (length (caar clauses)))
- (cadar clauses)
- (iterate (for (head body) :in clauses)
- (collect (list (subseq head 1) body)))))
- (split (clauses)
- (-<> clauses
- (group-by (rcurry #'aref 0) <> :key #'first)
- (iterate (for (k v) :in-hashtable <>)
- (collect (list k (strip v)))))))
- (recursively ((clauses (split clauses))
- (codons (list x y z))
- (i 0))
- `(ecase ,(first codons)
- ,@(iterate (for (k remaining) :in clauses)
- (collect `(,k ,(if (atom remaining)
- remaining
- (recur (split remaining)
- (rest codons)
- (1+ i)))))))))))))
-
-(defun codon-to-protein (vector index)
- "Return the amino acid encoded by the codon in `vector` at `index`."
- (codon-case (vector index)
- ("UUU" #\F) ("CUU" #\L) ("AUU" #\I) ("GUU" #\V)
- ("UUC" #\F) ("CUC" #\L) ("AUC" #\I) ("GUC" #\V)
- ("UUA" #\L) ("CUA" #\L) ("AUA" #\I) ("GUA" #\V)
- ("UUG" #\L) ("CUG" #\L) ("AUG" #\M) ("GUG" #\V)
- ("UCU" #\S) ("CCU" #\P) ("ACU" #\T) ("GCU" #\A)
- ("UCC" #\S) ("CCC" #\P) ("ACC" #\T) ("GCC" #\A)
- ("UCA" #\S) ("CCA" #\P) ("ACA" #\T) ("GCA" #\A)
- ("UCG" #\S) ("CCG" #\P) ("ACG" #\T) ("GCG" #\A)
- ("UAU" #\Y) ("CAU" #\H) ("AAU" #\N) ("GAU" #\D)
- ("UAC" #\Y) ("CAC" #\H) ("AAC" #\N) ("GAC" #\D)
- ("UAA" nil) ("CAA" #\Q) ("AAA" #\K) ("GAA" #\E)
- ("UAG" nil) ("CAG" #\Q) ("AAG" #\K) ("GAG" #\E)
- ("UGU" #\C) ("CGU" #\R) ("AGU" #\S) ("GGU" #\G)
- ("UGC" #\C) ("CGC" #\R) ("AGC" #\S) ("GGC" #\G)
- ("UGA" nil) ("CGA" #\R) ("AGA" #\R) ("GGA" #\G)
- ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G)))
-
-(defun translate (rna &key (start 0))
- "Translate a string of RNA bases into a protein string of amino acids.
-
- `rna` will be searched (beginning at `start`) for a start codon and
- translation will proceed from there. If no start codon occurs after `start`
- then `nil` will be returned.
-
- Once a start codon has been found, translation proceeds to the next stop
- codon. If no stop codon is present, `nil` will be returned.
-
- Otherwise two values are returned: the protein string and the index into `rna`
- where it started.
-
- "
- (when-let ((start (search "AUG" rna :start2 start)))
- (values
- (iterate (with limit = (- (length rna) 3))
- (for i :from start :by 3)
- (when (> i limit)
- (return-from translate (values nil nil)))
- (for protein = (codon-to-protein rna i))
- (while protein)
- (collect protein :result-type 'string))
- start)))
-
+(defpackage :rosalind/prot (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/prot)
(define-problem prot (data string)
"AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"
"MAMAPRTEINSTRING"
- (translate data))
-
-;; (solve prot)
+ (u:translate data))
--- a/src/problems/prtm.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/prtm.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/prtm (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/prtm)
(defconstant +monoisotopic-mass-of-water+ 18.01056d0
"The monoisotopic mass of a single water molecule, in Daltons.")
@@ -33,9 +34,9 @@
(define-problem prtm (data string)
"SKADYEK"
"821.392"
- (-<> data
- (delete #\newline <>)
- (summation <> :key #'monoisotopic-mass)
- float-string))
+ (_ data
+ (delete #\newline _)
+ (summation _ :key #'monoisotopic-mass)
+ u:float-string))
--- a/src/problems/rear.lisp Sun Jan 19 19:51:35 2020 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-(in-package :rosalind)
-
-(defparameter *input-rear* "1 2 3 4 5 6 7 8 9 10
-3 1 5 2 7 4 9 6 10 8
-
-3 10 8 2 5 4 7 1 6 9
-5 2 3 1 7 4 10 8 6 9
-
-8 6 7 9 4 1 3 10 2 5
-8 2 7 6 9 1 5 3 10 4
-
-3 9 10 4 1 8 6 7 5 2
-2 9 8 5 1 7 3 4 6 10
-
-1 2 3 4 5 6 7 8 9 10
-1 2 3 4 5 6 7 8 9 10")
-
-(defparameter *output-rear* "9 4 5 7 0")
-
-(defun bounds-list (start end &key (min-length 0))
- "Return a list of all subseq bounds between `start` and `end`."
- (iterate outer
- (for s :from start :below end)
- (iterate (for e :from (+ s min-length) :to end)
- (in outer (collect (cons s e))))))
-
-(defun nreverse-vector (vector &key start end)
- (iterate (for i :from start)
- (for j :downfrom (1- end))
- (repeat (floor (- end start) 2))
- (rotatef (aref vector i)
- (aref vector j)))
- vector)
-
-(defun reverse-vector (vector &key start end)
- (nreverse-vector (copy-seq vector) :start start :end end))
-
-(defun reversals (vector &key start end)
- (iterate (for (s . e) :in (bounds-list start end :min-length 2))
- (collect (reverse-vector vector :start s :end e))))
-
-(defun reversals-required (from to)
- (iterate
- (with remaining = (make-queue)) ; queue of (score . state)
- (initially (enqueue (cons 0 from) remaining))
- (for (n . v) = (dequeue remaining))
- (for start = (mismatch v to))
- (for end = (mismatch v to :from-end t))
- (when (null start)
- (return n))
- (incf n)
- (dolist (r (reversals v :start start :end end))
- (enqueue (cons n r) remaining))))
-
-;; todo: finish this one
-;; (define-problem rear (data string)
-;; *input-rear*
-;; *output-rear*
-;; (let ((pairs (-<> data
-;; (str:split (format nil "~2%") <>)
-;; (mapcar (curry #'str:split #\newline) <>)
-;; (mapcar (curry #'mapcar #'read-all-from-string) <>)
-;; (mapcar (curry #'mapcar (rcurry #'coerce 'vector)) <>))))
-;; (iterate (for (from to) :in pairs)
-;; (collect (time (reversals-required from to))))))
-
-
--- a/src/problems/revc.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/revc.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/revc (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/revc)
;; DNA is made up of two strands running in opposite directions, usually twisted
;; into a double helix, with the bases bonded:
@@ -28,16 +29,8 @@
;; polarized ends, with one end being called 3′ and the other being 5′, but I'm
;; not 100% sure.
-(defun nreverse-complement (dna)
- (map-into dna #'dna-complement dna)
- (nreverse dna))
-
-(defun reverse-complement (dna)
- (nreverse-complement (copy-seq dna)))
-
(define-problem revc (data string)
"AAAACCCGGT"
"ACCGGGTTTT"
- "Return the reverse complement of `data`."
- (nreverse-complement (delete #\newline data)))
+ (u:nreverse-complement (delete #\newline data)))
--- a/src/problems/revp.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/revp.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/revp (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/revp)
;; The problem explanation provided a clever trick: you can cut the comparison
;; size in half by comparing the first half of the string to the reverse
@@ -9,11 +10,11 @@
;; AAC reverse
;; AAC=AAC palindrome!
-(defparameter *input-revp*
+(defparameter *input*
">Rosalind_24
TCAATGCATGCGGGTCTATATGCAT")
-(defparameter *output-revp*
+(defparameter *output*
"4 6
5 4
6 6
@@ -30,7 +31,7 @@
(end (+ start length)))
(unless (> end (length dna))
(string= dna
- (reverse-complement (subseq dna mid end))
+ (u:reverse-complement (subseq dna mid end))
:start1 start
:end1 mid))))
@@ -38,12 +39,10 @@
(iterate (for i :from 12 :downto 4 :by 2)
(finding i :such-that (reverse-palindrome-p dna start i))))
-(define-problem revp (data stream)
- *input-revp*
- *output-revp*
+(define-problem revp (data stream) *input* *output*
(with-output-to-string (s)
(iterate
- (with dna = (cdr (first (read-fasta-into-alist data))))
+ (with dna = (cdr (first (u:read-fasta-into-alist data))))
(for i :index-of-vector dna)
(when-let ((l (reverse-palindrome-length dna i)))
(format s "~D ~D~%" (1+ i) l)))))
--- a/src/problems/rna.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/rna.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/rna (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/rna)
;; RNA is a another nucleic acid that is similar to DNA, with the following
;; differences:
@@ -16,17 +17,9 @@
;; After that the mRNA exists the nucleus. Then proteins are produced from the
;; mRNA by ribosomes. That process is called "translation".
-(defun transcribe (dna)
- "Transcribe a fresh RNA string from `DNA`."
- (substitute #\U #\T dna))
-
-(defun ntranscribe (dna)
- "Destructively transcribe `DNA` to RNA in-place."
- (nsubstitute #\U #\T dna))
-
(define-problem rna (data string)
"GATGGAACTTGACTACGTAAATT"
"GAUGGAACUUGACUACGUAAAUU"
"Transcribe `data` from DNA into RNA."
- (ntranscribe data))
+ (u:ntranscribe data))
--- a/src/problems/rstr.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/rstr.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,16 +1,15 @@
-(in-package :rosalind)
+(defpackage :rosalind/rstr (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/rstr)
-(defparameter *input-rstr* "90000 0.6
+(defparameter *input* "90000 0.6
ATAGCCGA")
-(defparameter *output-rstr* "0.689")
+(defparameter *output* "0.689")
-(define-problem rstr (data stream)
- *input-rstr*
- *output-rstr*
+(define-problem rstr (data stream) *input* *output*
(let* ((n (read data))
(gc (coerce (read data) 'double-float))
(dna (read-line data))
- (prob (sequence-probability gc dna)))
- (float-string (- 1 (expt (- 1 prob) n)))))
+ (prob (u:sequence-probability gc dna)))
+ (u:float-string (- 1 (expt (- 1 prob) n)))))
--- a/src/problems/seto.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/seto.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,11 +1,12 @@
-(in-package :rosalind)
+(defpackage :rosalind/seto (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/seto)
-(defparameter *input-seto*
+(defparameter *input*
"10
{1, 2, 3, 4, 5}
{2, 8, 5, 10}")
-(defparameter *output-seto*
+(defparameter *output*
"{1, 2, 3, 4, 5, 8, 10}
{2, 5}
{1, 3, 4}
@@ -21,8 +22,8 @@
(mapcar #'parse-integer (ppcre:all-matches-as-strings "\\d+" string)))
(define-problem seto (data stream)
- *input-seto*
- *output-seto*
+ *input*
+ *output*
(let ((u (alexandria:iota (read data) :start 1))
(a (parse-set (read-line data)))
(b (parse-set (read-line data))))
@@ -37,5 +38,3 @@
#; Scratch --------------------------------------------------------------------
-
-(problem-seto)
--- a/src/problems/sign.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/sign.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,8 +1,9 @@
-(in-package :rosalind)
+(defpackage :rosalind/sign (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/sign)
-(defparameter *input-sign* "2")
+(defparameter *input* "2")
-(defparameter *output-sign* "8
+(defparameter *output* "8
-1 -2
-1 2
-2 -1
@@ -17,21 +18,16 @@
(if (null list)
(list '()) ;; there is exactly one permutation of the empty list: ()
(destructuring-bind (n . more) list
- (append (mapcar (curry #'cons n) (sign-permutations more))
- (mapcar (curry #'cons (- n)) (sign-permutations more))))))
+ (append (mapcar (u:curry #'cons n) (sign-permutations more))
+ (mapcar (u:curry #'cons (- n)) (sign-permutations more))))))
-(define-problem sign (data string)
- *input-sign*
- *output-sign*
+(define-problem sign (data string) *input* *output*
(let* ((n (parse-integer data))
(count (* (expt 2 n)
- (factorial n)))
+ (u:factorial n)))
(perms (mapcan #'sign-permutations
- (permutations (alexandria:iota n :start 1)))))
+ (u:permutations (alexandria:iota n :start 1)))))
;; sort to ensure consistent output for the unit test
(format nil "~D~%~{~A~^~%~}"
count
- (sort (mapcar (curry #'str:join " ") perms) #'string<))))
-
-;; (problem-sign "2")
-;; (solve sign)
+ (sort (mapcar (u:curry #'str:join " ") perms) #'string<))))
--- a/src/problems/splc.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/splc.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,13 +1,14 @@
-(in-package :rosalind)
+(defpackage :rosalind/splc (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/splc)
-(defparameter *input-splc* ">Rosalind_10
+(defparameter *input* ">Rosalind_10
ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
>Rosalind_12
ATCGGTCGAA
>Rosalind_15
ATCGGTCGAGCGTGT")
-(defparameter *output-splc* "MVYIADKQHVASREAYGHMFKVCA")
+(defparameter *output* "MVYIADKQHVASREAYGHMFKVCA")
(defun prefixp (prefix vector &key (start 0) (test #'eql))
@@ -31,7 +32,7 @@
(prefixp intron dna :start start))
(defun find-intron (introns dna &key (start 0))
- (find-if (rcurry #'intron-matches-p dna :start start) introns))
+ (find-if (u:rcurry #'intron-matches-p dna :start start) introns))
(defun remove-introns (dna introns)
(iterate
@@ -43,11 +44,11 @@
(define-problem splc (data stream)
- *input-splc*
- *output-splc*
+ *input*
+ *output*
(destructuring-bind (dna . introns)
- (mapcar #'cdr (read-fasta-into-alist data))
- (-<> dna
- (remove-introns <> introns)
- transcribe
- translate)))
+ (mapcar #'cdr (u:read-fasta-into-alist data))
+ (_ dna
+ (remove-introns _ introns)
+ u:transcribe
+ u:translate)))
--- a/src/problems/sseq.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/sseq.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,12 +1,13 @@
-(in-package :rosalind)
+(defpackage :rosalind/sseq (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/sseq)
-(defparameter *input-sseq* ">Rosalind_14
+(defparameter *input* ">Rosalind_14
ACGTACGTGACG
>Rosalind_18
GTA
")
-(defparameter *output-sseq* "3 4 5")
+(defparameter *output* "3 4 5")
;; todo: make this more efficient for lists
(defun subsequence-positions (needle haystack &key
@@ -26,11 +27,9 @@
(return result)
(setf n (elt needle ni))))))
-(define-problem sseq (data stream)
- *input-sseq*
- *output-sseq*
- (let* ((haystack (nth-value 1 (read-fasta data)))
- (needle (nth-value 1 (read-fasta data))))
+(define-problem sseq (data stream) *input* *output*
+ (let* ((haystack (nth-value 1 (u:read-fasta data)))
+ (needle (nth-value 1 (u:read-fasta data))))
(-<> (subsequence-positions needle haystack :test #'char=)
(mapcar #'1+ <>)
(format nil "~{~D~^ ~}" <>))))
--- a/src/problems/sset.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/sset.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,16 +1,12 @@
-(in-package :rosalind)
+(defpackage :rosalind/sset (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/sset)
;; The cardinality of a power set is 2ⁿ, because you can represent an individual
;; set as a binary string where 1 means the element is included and 0 is not,
;; and there are 2ⁿ possible binary strings of length n.
-(define-problem sset (data stream)
- "3"
- "8"
+(define-problem sset (data stream) "3" "8"
(mod (expt 2 (read data)) 1000000))
#; Scratch --------------------------------------------------------------------
-
-(problem-sset)
-(solve sset)
--- a/src/problems/subs.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/subs.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,14 +1,13 @@
-(in-package :rosalind)
+(defpackage :rosalind/subs (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/subs)
-(defparameter *input-subs* "GATATATGCATATACTT
+(defparameter *input* "GATATATGCATATACTT
ATAT")
-(defparameter *output-subs* "2 4 10")
+(defparameter *output* "2 4 10")
-(define-problem subs (data stream)
- *input-subs*
- *output-subs*
+(define-problem subs (data stream) *input* *output*
(let ((haystack (read-line data))
(needle (read-line data)))
(iterate
--- a/src/problems/tran.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/tran.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,13 +1,26 @@
-(in-package :rosalind)
+(defpackage :rosalind/tran (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/tran)
-(defparameter *input-tran* ">Rosalind_0209
+(defparameter *input* ">Rosalind_0209
GCAACGCACAACGAAAACCCTTAGGGACTGGATTATTTCGTGATCGTTGTAGTTATTGGA
AGTACGGGCATCAACCCAGTT
>Rosalind_2200
TTATCTGACAAAGAAAGCCGTCAACGGCTGGATAATTTCGCGATCGTGCTGGTTACTGGC
GGTACGAGTGTTCCTTTGGGT")
-(defparameter *output-tran* "1.21428571429")
+(defparameter *output* "1.21428571429")
+
+(defun rings (base)
+ "Return the number of rings in the structure of `base`.
+
+ Pyrimidines (cytosine, thymine, and uracil) have a single-ring structure.
+
+ Purines (adenine and guanine) have a double-ring structure.
+
+ "
+ (ecase base
+ ((#\A #\G) 2)
+ ((#\C #\T #\U) 1)))
(defun transitionp (x y)
(and (char/= x y)
@@ -17,14 +30,9 @@
(and (char/= x y)
(/= (rings x) (rings y))))
-(define-problem tran (data stream)
- *input-tran*
- *output-tran*
+(define-problem tran (data stream) *input* *output*
(destructuring-bind (x y)
- (mapcar #'cdr (read-fasta-into-alist data))
- (format nil "~,11F" (coerce (/ (mapcount #'transitionp x y)
- (mapcount #'transversionp x y))
+ (mapcar #'cdr (u:read-fasta-into-alist data))
+ (format nil "~,11F" (coerce (/ (u:mapcount #'transitionp x y)
+ (u:mapcount #'transversionp x y))
'double-float))))
-
-;; (problem-tran "2")
-;; (solve tran)
--- a/src/problems/tree.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/tree.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,4 +1,5 @@
-(in-package :rosalind)
+(defpackage :rosalind/tree (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/tree)
;; For every edge we add we can link up two previously-unconnected trees. If we
;; have N separate trees, we just need N-1 edges to connect them. So the
@@ -6,7 +7,7 @@
;; given graph. Some day I really need to add that as a utility function to
;; cl-digraph.
-(defparameter *input-tree*
+(defparameter *input*
"10
1 2
2 8
@@ -15,7 +16,7 @@
6 10
7 9")
-(defparameter *output-tree*
+(defparameter *output*
"3")
(defun subgraph-vertices (graph start)
@@ -29,11 +30,9 @@
(while found)
(for subgraph = (subgraph-vertices graph vertex))
(collect subgraph)
- (map nil (curry #'digraph:remove-vertex graph) subgraph))))
+ (map nil (u:curry #'digraph:remove-vertex graph) subgraph))))
-(define-problem tree (data stream)
- *input-tree*
- *output-tree*
+(define-problem tree (data stream) *input* *output*
(let ((graph (digraph:make-digraph
:initial-vertices (alexandria:iota (read data) :start 1))))
(iterate
@@ -45,5 +44,3 @@
#; Scratch --------------------------------------------------------------------
-
-(problem-tree)
--- a/src/problems/trie.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/problems/trie.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,11 +1,12 @@
-(in-package :rosalind)
+(defpackage :rosalind/trie (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/trie)
-(defparameter *input-trie*
+(defparameter *input*
"ATAGA
ATC
GAT")
-(defparameter *output-trie*
+(defparameter *output*
"1 2 A
2 3 T
3 4 A
@@ -22,12 +23,12 @@
(defun make-trie (strings)
(recursively ((strings strings))
- (let ((terminal (find-if #'string-empty-p strings))
- (strings (remove-if #'string-empty-p strings)))
+ (let ((terminal (find-if #'u:string-empty-p strings))
+ (strings (remove-if #'u:string-empty-p strings)))
(make-trie-node
:terminal (if terminal t nil)
:children (iterate
- (for (ch kids) :in-hashtable (group-by #'first-char strings))
+ (for (ch kids) :in-hashtable (group-by #'u:first-char strings))
(collect-hash (ch (recur (mapcar (lambda (s) (subseq s 1))
kids)))))))))
@@ -78,10 +79,8 @@
(gather (list (n node) (n child) ch))
(recur child)))))))
-(define-problem trie (data stream)
- *input-trie*
- *output-trie*
- (let* ((strings (read-lines data))
+(define-problem trie (data stream) *input* *output*
+ (let* ((strings (u:read-lines data))
(trie (make-trie strings)))
;; (dot-graph 'trie trie :rankdir :tb)
(format nil "~{~{~A~^ ~}~^~%~}" (trie-adjacency-list trie))))
--- a/src/utils.lisp Sun Jan 19 19:51:35 2020 -0500
+++ b/src/utils.lisp Sun Jan 19 20:38:29 2020 -0500
@@ -1,29 +1,6 @@
-(in-package :rosalind)
+(in-package :rosalind/utils)
;;;; Misc ---------------------------------------------------------------------
-(defun sh (command input)
- (declare (ignorable command input))
- #+sbcl
- (sb-ext:run-program (first command) (rest command)
- :search t
- :input (make-string-input-stream input))
- #+ccl
- (ccl:run-program (first command) (rest command)
- :input (make-string-input-stream input))
- #+abcl
- (let ((p (system:run-program (first command) (rest command)
- :input :stream
- :output t
- :wait nil)))
- (write-string input (system:process-input p))
- (close (system:process-input p)))
- #-(or sbcl ccl abcl)
- (error "Not implemented for this Lisp implementation, sorry"))
-
-(defun pbcopy (string)
- (values string (sh '("pbcopy") string)))
-
-
(defun ensure-stream (input)
(ctypecase input
(stream input)
@@ -34,61 +11,11 @@
(stream (alexandria:read-stream-content-into-string input))
(string (copy-seq input))))
-(defun ensure-list (value)
- (if (listp value)
- value
- (list value)))
-
-
-(defun hamming (sequence1 sequence2 &key (test #'eql))
- "Return the Hamming distance between `sequence1` and `sequence2`."
- ;; todo assert length=?
- (let ((result 0))
- (map nil (lambda (x y)
- (unless (funcall test x y)
- (incf result)))
- sequence1
- sequence2)
- result))
-
-
-(defun factorial (x)
- (check-type x (integer 0))
- (iterate (for i :from 1 :to x)
- (multiplying i)))
(defun permutations (items)
(gathering (alexandria:map-permutations #'gather items)))
-(defun dna-complement (base)
- (ecase base
- (#\A #\T)
- (#\T #\A)
- (#\G #\C)
- (#\C #\G)))
-
-(defun rna-complement (base)
- (ecase base
- (#\A #\U)
- (#\U #\A)
- (#\G #\C)
- (#\C #\G)))
-
-
-(defun rings (base)
- "Return the number of rings in the structure of `base`.
-
- Pyrimidines (cytosine, thymine, and uracil) have a single-ring structure.
-
- Purines (adenine and guanine) have a double-ring structure.
-
- "
- (ecase base
- ((#\A #\G) 2)
- ((#\C #\T #\U) 1)))
-
-
(defun-inline gcp (base)
"Return whether `base` is G or C."
(or (char= #\G base)
@@ -116,6 +43,112 @@
result))
+;;;; Dogma --------------------------------------------------------------------
+(defun dna-complement (base)
+ (ecase base
+ (#\A #\T)
+ (#\T #\A)
+ (#\G #\C)
+ (#\C #\G)))
+
+(defun nreverse-complement (dna)
+ (map-into dna #'dna-complement dna)
+ (nreverse dna))
+
+(defun reverse-complement (dna)
+ (nreverse-complement (copy-seq dna)))
+
+(defun transcribe (dna)
+ "Transcribe a fresh RNA string from `DNA`."
+ (substitute #\U #\T dna))
+
+(defun ntranscribe (dna)
+ "Destructively transcribe `DNA` to RNA in-place."
+ (nsubstitute #\U #\T dna))
+
+(defmacro codon-case ((vector index) &rest clauses)
+ ;; Compiles a giant list of clauses into a tree of ECASEs.
+ ;;
+ ;; Each codon will have at most 3 ECASEs to pass through. Each ECASE has at
+ ;; most four options, so in the worst case we end up with 3 * 4 = 12
+ ;; comparisons instead of 64.
+ ;;
+ ;; If we ever convert bases to vectors of (unsigned-byte 2)s we could
+ ;; potentially use a lookup table here, e.g.:
+ ;;
+ ;; (aref +amino-acids+ (+ x (ash y 2) (ash z 4)))
+ (alexandria:once-only (vector index)
+ (alexandria:with-gensyms (x y z)
+ `(let ((,x (aref ,vector ,index))
+ (,y (aref ,vector (+ ,index 1)))
+ (,z (aref ,vector (+ ,index 2))))
+ ,(labels ((strip (clauses)
+ (if (= 1 (length (caar clauses)))
+ (cadar clauses)
+ (iterate (for (head body) :in clauses)
+ (collect (list (subseq head 1) body)))))
+ (split (clauses)
+ (-<> clauses
+ (group-by (rcurry #'aref 0) <> :key #'first)
+ (iterate (for (k v) :in-hashtable <>)
+ (collect (list k (strip v)))))))
+ (recursively ((clauses (split clauses))
+ (codons (list x y z))
+ (i 0))
+ `(ecase ,(first codons)
+ ,@(iterate (for (k remaining) :in clauses)
+ (collect `(,k ,(if (atom remaining)
+ remaining
+ (recur (split remaining)
+ (rest codons)
+ (1+ i)))))))))))))
+
+(defun codon-to-protein (vector index)
+ "Return the amino acid encoded by the codon in `vector` at `index`."
+ (codon-case (vector index)
+ ("UUU" #\F) ("CUU" #\L) ("AUU" #\I) ("GUU" #\V)
+ ("UUC" #\F) ("CUC" #\L) ("AUC" #\I) ("GUC" #\V)
+ ("UUA" #\L) ("CUA" #\L) ("AUA" #\I) ("GUA" #\V)
+ ("UUG" #\L) ("CUG" #\L) ("AUG" #\M) ("GUG" #\V)
+ ("UCU" #\S) ("CCU" #\P) ("ACU" #\T) ("GCU" #\A)
+ ("UCC" #\S) ("CCC" #\P) ("ACC" #\T) ("GCC" #\A)
+ ("UCA" #\S) ("CCA" #\P) ("ACA" #\T) ("GCA" #\A)
+ ("UCG" #\S) ("CCG" #\P) ("ACG" #\T) ("GCG" #\A)
+ ("UAU" #\Y) ("CAU" #\H) ("AAU" #\N) ("GAU" #\D)
+ ("UAC" #\Y) ("CAC" #\H) ("AAC" #\N) ("GAC" #\D)
+ ("UAA" nil) ("CAA" #\Q) ("AAA" #\K) ("GAA" #\E)
+ ("UAG" nil) ("CAG" #\Q) ("AAG" #\K) ("GAG" #\E)
+ ("UGU" #\C) ("CGU" #\R) ("AGU" #\S) ("GGU" #\G)
+ ("UGC" #\C) ("CGC" #\R) ("AGC" #\S) ("GGC" #\G)
+ ("UGA" nil) ("CGA" #\R) ("AGA" #\R) ("GGA" #\G)
+ ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G)))
+
+(defun translate (rna &key (start 0))
+ "Translate a string of RNA bases into a protein string of amino acids.
+
+ `rna` will be searched (beginning at `start`) for a start codon and
+ translation will proceed from there. If no start codon occurs after `start`
+ then `nil` will be returned.
+
+ Once a start codon has been found, translation proceeds to the next stop
+ codon. If no stop codon is present, `nil` will be returned.
+
+ Otherwise two values are returned: the protein string and the index into `rna`
+ where it started.
+
+ "
+ (when-let ((start (search "AUG" rna :start2 start)))
+ (values
+ (iterate (with limit = (- (length rna) 3))
+ (for i :from start :by 3)
+ (when (> i limit)
+ (return-from translate (values nil nil)))
+ (for protein = (codon-to-protein rna i))
+ (while protein)
+ (collect protein :result-type 'string))
+ start)))
+
+
;;;; Strings ------------------------------------------------------------------
(defun string-empty-p (string)
(zerop (length string)))
@@ -127,6 +160,12 @@
;;;; Math ---------------------------------------------------------------------
+(defun factorial (x)
+ (check-type x (integer 0))
+ (iterate (for i :from 1 :to x)
+ (multiplying i)))
+
+
(defmacro do-sum ((var from to) &body body)
"Sum `body` with `var` iterating over `[from, to]`.
@@ -252,13 +291,6 @@
(replace buffer initial-contents))
buffer))
-(defun make-string-buffer
- (&key initial-contents
- (initial-capacity (max 64 (length initial-contents))))
- (make-buffer :initial-contents initial-contents
- :initial-capacity initial-capacity
- :element-type 'character))
-
(defun buffer-push (buffer element)
(vector-push-extend element buffer)
element)
@@ -360,7 +392,7 @@
;;;; Uniprot ------------------------------------------------------------------
-(defparameter *uniprot-cache* (make-hash-table :test #'equal))
+(defvar *uniprot-cache* (make-hash-table :test #'equal))
(defmacro get-cached (key cache expr)
(once-only (key cache)
@@ -374,7 +406,7 @@
(defun uniprot (id)
(get-cached id *uniprot-cache*
- (-<> (uniprot-url id)
+ (_ (uniprot-url id)
drakma:http-request
read-fasta-into-alist
first)))
@@ -383,14 +415,14 @@
;;;; Output -------------------------------------------------------------------
(defun float-string (float-or-floats &optional (precision 3))
(with-output-to-string (s)
- (loop :for (float . more) :on (ensure-list float-or-floats)
+ (loop :for (float . more) :on (alexandria:ensure-list float-or-floats)
:do (format s "~,VF~:[~; ~]" precision float more))))
;;;; Testing ------------------------------------------------------------------
(defmacro define-test (problem input output &optional (test 'string=))
- `(test ,(alexandria:symbolicate 'test- problem)
- (is (,test ,output (aesthetic-string (,problem ,input))))))
+ `(1am:test ,(symbolicate 'test- problem)
+ (1am:is (,test ,output (aesthetic-string (,problem ,input))))))
(defun run-tests ()
(1am:run))
@@ -400,7 +432,7 @@
(defmacro define-problem (name (arg type) sample-input sample-output &body body)
(multiple-value-bind (body declarations docstring)
(alexandria:parse-body body :documentation t)
- (let ((symbol (alexandria:symbolicate 'problem- name)))
+ (let ((symbol (symbolicate 'problem- name)))
`(progn
(defun ,symbol (&optional (,arg ,sample-input))
,@(when docstring (list docstring))
@@ -423,5 +455,5 @@
(defmacro solve (name)
(assert (symbolp name) ()
"Usage: (solve foo)~%foo should not be quoted.")
- `(solve% ',(alexandria:symbolicate 'problem- name)))
+ `(solve% ',(symbolicate 'problem- name)))