# HG changeset patch # User Steve Losh # Date 1579484309 18000 # Node ID 2735aa6aab79ff724b8471983e3c0e574bf74d82 # Parent 2d34585c57044b5b27b049c4c6e3aa5b57b05b39 The great packaging of 2020 diff -r 2d34585c5704 -r 2735aa6aab79 src/package.lisp --- a/src/package.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/package.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,9 +1,49 @@ -(defpackage :rosalind +(defpackage :rosalind/utils + (:nicknames :u) (:use :cl :iterate :losh) - (:import-from :1am :is) (:import-from :alexandria :curry :rcurry :compose :ensure-gethash :with-gensyms :once-only :symbolicate) - (:shadowing-import-from :1am :test) - (:export :run-tests)) + (:export + :curry :rcurry :compose + :ensure-gethash + :with-gensyms :once-only :symbolicate + + :define-problem :solve + + :factorial + + :permutations + + :dna-complement :reverse-complement :nreverse-complement + :transcribe :ntranscribe + :translate + + :gcp :base-probability :sequence-probability + + :mapcount + + :string-empty-p + :first-char + + :Σ :Π :binomial-coefficient + + :returning-final + + :read-lines + :read-fasta + :read-fasta-into-hash-table + :read-fasta-into-alist + + :buffering + + :uniprot + + :float-string + + :run-tests :solve)) + +(defpackage :rosalind + (:import-from :rosalind/utils :run-tests :define-problem :solve) + (:export :run-tests :define-problem :solve)) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/cons.lisp --- a/src/problems/cons.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/cons.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,6 +1,7 @@ -(in-package :rosalind) +(defpackage :rosalind/cons (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/cons) -(defparameter *input-cons* ">Rosalind_1 +(defparameter *input* ">Rosalind_1 ATCCAGCT >Rosalind_2 GGGCAACT @@ -15,7 +16,7 @@ >Rosalind_7 ATGGCACT") -(defparameter *output-cons* "ATGCAACT +(defparameter *output* "ATGCAACT A: 5 1 0 0 5 5 0 0 C: 0 0 1 4 2 0 6 1 G: 1 1 6 3 0 1 0 0 @@ -114,9 +115,7 @@ (collecting winner :result-type 'string))) -(define-problem cons (data stream) - *input-cons* - *output-cons* +(define-problem cons (data stream) *input* *output* (let* ((profile-matrix (profile-matrix-from-fasta data)) (consensus-string (consensus-string profile-matrix)) (length (length consensus-string))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/dna.lisp --- a/src/problems/dna.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/dna.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/dna (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/dna) ;; Nucleic acids are polymers, which means they're long, repeating chains of ;; smaller molecules called monomers. diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/eval.lisp --- a/src/problems/eval.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/eval.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,20 +1,19 @@ -(in-package :rosalind) +(defpackage :rosalind/eval (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/eval) -(defparameter *input-eval* "10 +(defparameter *input* "10 AG 0.25 0.5 0.75") -(defparameter *output-eval* "0.422 0.563 0.422") +(defparameter *output* "0.422 0.563 0.422") -(define-problem eval (data stream) - *input-eval* - *output-eval* +(define-problem eval (data stream) *input* *output* (let* ((string-length (read data)) (substring (read-line data)) (gc-contents (read-all data)) (chances (- string-length (1- (length substring))))) - (float-string (mapcar (lambda (gc-content) - (* chances (sequence-probability - (coerce gc-content 'double-float) - substring))) - gc-contents)))) + (u:float-string (mapcar (lambda (gc-content) + (* chances (u:sequence-probability + (coerce gc-content 'double-float) + substring))) + gc-contents)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/fib.lisp --- a/src/problems/fib.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/fib.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/fib (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/fib) (define-problem fib (data stream) "5 3" diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/fibd.lisp --- a/src/problems/fibd.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/fibd.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,9 +1,10 @@ -(in-package :rosalind) +(defpackage :rosalind/fibd (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/fibd) (define-problem fibd (data stream) "6 3" "4" - (iter + (iterate (with months = (read data)) (with lifespan = (read data)) (for month :from 2 :to months) @@ -23,12 +24,14 @@ (births month)))) ;; We initialize the buffers with NIL in index 0 for the 1-based months, ;; and 1 in index 0 for the initial pair of rabbits. - (buffering (returning-final (population month)) - :into population - :initial-contents '(nil 1)) - (buffering (births month) - :into births - :initial-contents '(nil 1))))) + (u:buffering (u:returning-final (population month)) + :into population + :initial-contents '(nil 1)) + (u:buffering (births month) + :into births + :initial-contents '(nil 1))))) + +#; Scratch -------------------------------------------------------------------- (problem-fibd "45 6") ;; (solve fibd) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/gc.lisp --- a/src/problems/gc.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/gc.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,6 +1,7 @@ -(in-package :rosalind) +(defpackage :rosalind/gc (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/gc) -(defparameter *input-gc* ">Rosalind_6404 +(defparameter *input* ">Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 @@ -10,13 +11,11 @@ CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT") -(defparameter *output-gc* "Rosalind_0808 +(defparameter *output* "Rosalind_0808 60.919540") -(define-problem gc (data stream) - *input-gc* - *output-gc* +(define-problem gc (data stream) *input* *output* (labels ((gcp (base) (or (char= #\G base) (char= #\C base))) @@ -28,7 +27,3 @@ (for gc = (gc-content dna)) (finding (format nil "~A~%~,6F" label (* 100 gc)) :maximizing gc)))) - - -;; (problem-gc *input-gc*) -;; (solve gc) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/grph.lisp --- a/src/problems/grph.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/grph.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,6 +1,7 @@ -(in-package :rosalind) +(defpackage :rosalind/grph (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/grph) -(defparameter *input-grph* +(defparameter *input* ">Rosalind_0498 AAATAAA >Rosalind_2391 @@ -12,7 +13,7 @@ >Rosalind_5013 GGGTGGG") -(defparameter *output-grph* +(defparameter *output* "Rosalind_0498 Rosalind_0442 Rosalind_0498 Rosalind_2391 Rosalind_2391 Rosalind_2323 @@ -34,10 +35,8 @@ :end2 k)) -(define-problem grph (data stream) - *input-grph* - *output-grph* - (let* ((data (read-fasta-into-hash-table data)) +(define-problem grph (data stream) *input* *output* + (let* ((data (u:read-fasta-into-hash-table data)) (graph (digraph:make-digraph :test #'equal :initial-vertices (alexandria:hash-table-keys data)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/hamm.lisp --- a/src/problems/hamm.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/hamm.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,12 +1,21 @@ -(in-package :rosalind) +(defpackage :rosalind/hamm (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/hamm) + +(defparameter *input* "GAGCCTACTAACGGGAT +CATCGTAATGACGGCCT") -(defparameter *input-hamm* "GAGCCTACTAACGGGAT -CATCGTAATGACGGCCT") +(defun hamming (sequence1 sequence2 &key (test #'eql)) + "Return the Hamming distance between `sequence1` and `sequence2`." + ;; todo assert length=? + (let ((result 0)) + (map nil (lambda (x y) + (unless (funcall test x y) + (incf result))) + sequence1 + sequence2) + result)) (define-problem hamm (data stream) - *input-hamm* + *input* "7" (hamming (read-line data) (read-line data) :test #'char=)) - -;; (problem-hamm *input-hamm*) -;; (solve hamm) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/iev.lisp --- a/src/problems/iev.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/iev.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/iev (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/iev) (define-problem iev (data stream) "1 0 0 1 0 1" @@ -9,7 +10,7 @@ (hh (read data)) (hr (read data)) (rr (read data))) - (float-string + (u:float-string ;; It's just a weighted average… (* 2 (+ (* dd 1) (* dh 1) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/inod.lisp --- a/src/problems/inod.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/inod.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/inod (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/inod) ;; This one is trivial once you know the closed-form solution of N-2. The ;; intuition for that can come in two parts. diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/iprb.lisp --- a/src/problems/iprb.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/iprb.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/iprb (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/iprb) (define-problem iprb (data stream) "2 2 2" @@ -24,7 +25,7 @@ ;; = 2XY/N²-N (/ (* 2 x y) (- (* n n) n)))) - (float-string + (u:float-string (+ (* (p-same d d) 1) ;; AA AA (* (p-diff d h) 1) ;; AA Aa (* (p-diff d r) 1) ;; AA aa @@ -32,6 +33,3 @@ (* (p-diff h r) 1/2) ;; Aa aa (* (p-same r r) 0)) 5)))) ;; aa aa - -;; (problem-iprb) -;; (solve iprb) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/kmer.lisp --- a/src/problems/kmer.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/kmer.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,6 +1,7 @@ -(in-package :rosalind) +(defpackage :rosalind/kmer (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/kmer) -(defparameter *input-kmer* ">Rosalind_6431 +(defparameter *input* ">Rosalind_6431 CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA @@ -9,7 +10,7 @@ CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG") -(defparameter *output-kmer* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1") +(defparameter *output* "4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1") (defun mapc-kmers (function n &key (copy t)) @@ -25,18 +26,16 @@ (map nil #'branch "ACGT")))))) (defun map-kmers (function n) - (gathering (mapc-kmers (compose #'gather function) n))) + (gathering (mapc-kmers (u:compose #'gather function) n))) (defun kmers (n) (map-kmers #'identity n)) -(define-problem kmer (data stream) - *input-kmer* - *output-kmer* +(define-problem kmer (data stream) *input* *output* (iterate (with n = 4) - (with seq = (nth-value 1 (read-fasta data))) + (with seq = (nth-value 1 (u:read-fasta data))) (with counts = (make-hash-table :test #'equal)) (for i :from 0 :to (- (length seq) n)) (for kmer = (subseq seq i (+ i n))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/lcsm.lisp --- a/src/problems/lcsm.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/lcsm.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,15 +1,18 @@ -(in-package :rosalind) +(defpackage :rosalind/lcsm (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/lcsm) ;; This one sucked. We should use suffix trees some day but I just hacked this ;; together for now. -(defparameter *input-lcsm* ">Rosalind_1 +(defparameter *input* ">Rosalind_1 GATTACA >Rosalind_2 TAGACCA >Rosalind_3 ATACA") +(defparameter *output* "AC") + (defun compute-substring-table% (string1 string2) ;; Compute the table of substring lengths. ;; @@ -57,7 +60,7 @@ (defun longest (strings) "Return a list of the longest strings in `strings`." (remove-if-not - (curry #'= (alexandria:extremum (mapcar #'length strings) #'>)) + (u:curry #'= (alexandria:extremum (mapcar #'length strings) #'>)) strings :key #'length)) (defun longest-common-substrings-of-any (substrings string) @@ -68,10 +71,8 @@ longest (remove-duplicates <> :test #'string=))) -(define-problem lcsm (data stream) - *input-lcsm* - "AC" - (let ((lines (mapcar #'cdr (read-fasta-into-alist data)))) +(define-problem lcsm (data stream) *input* *output* + (let ((lines (mapcar #'cdr (u:read-fasta-into-alist data)))) (-<> (reduce #'longest-common-substrings-of-any (rest lines) :initial-value (list (first lines))) (sort <> #'string<) ; tests diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/lexf.lisp --- a/src/problems/lexf.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/lexf.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,10 +1,11 @@ -(in-package :rosalind) +(defpackage :rosalind/lexf (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/lexf) -(defparameter *input-lexf* +(defparameter *input* "A C G T 2") -(defparameter *output-lexf* +(defparameter *output* "AA AC AG @@ -24,9 +25,7 @@ ") -(define-problem lexf (data stream) - *input-lexf* - *output-lexf* +(define-problem lexf (data stream) *input* *output* (let* ((alphabet (sort (remove #\space (read-line data)) #'char<)) (n (read data)) (string (make-string n))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/lexv.lisp --- a/src/problems/lexv.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/lexv.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,10 +1,11 @@ -(in-package :rosalind) +(defpackage :rosalind/lexv (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/lexv) -(defparameter *input-lexv* +(defparameter *input* "D N A 3") -(defparameter *output-lexv* +(defparameter *output* "D DD DDD @@ -47,9 +48,7 @@ ") -(define-problem lexv (data stream) - *input-lexv* - *output-lexv* +(define-problem lexv (data stream) *input* *output* (let* ((alphabet (remove #\space (read-line data))) (n (read data)) (string (make-string n))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/lgis.lisp --- a/src/problems/lgis.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/lgis.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,10 +1,11 @@ -(in-package :rosalind) +(defpackage :rosalind/lgis (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/lgis) -(defparameter *input-lgis* +(defparameter *input* "5 5 1 4 2 3") -(defparameter *output-lgis* +(defparameter *output* "1 2 3 5 4 3") @@ -104,7 +105,7 @@ (for (values nil tail-index) = (bisect-right predicate tail-indexes value :start 1 ; ignore the garbage - :key (curry #'aref sequence))) ; deref when bisecting + :key (u:curry #'aref sequence))) ; deref when bisecting (if tail-index (progn ;; Found a more minimal tail for existing subseq @@ -122,9 +123,7 @@ result-type))) -(define-problem lgis (data stream) - *input-lgis* - *output-lgis* +(define-problem lgis (data stream) *input* *output* (let* ((size (read data)) (elements (gimme size (read data)))) (with-output-to-string (s) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/lia.lisp --- a/src/problems/lia.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/lia.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/lia (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/lia) ;; When a heterozygous organism mates, its offspring have a 50% chance to be ;; heterozygous themselves, *regardless of what the other mate happens to be*: @@ -30,7 +31,7 @@ ;; so we just sum up the probabilities of all of them. (let ((failures (- trials successes)) (failure-probability (- 1 success-probability))) - (* (binomial-coefficient trials successes) + (* (u:binomial-coefficient trials successes) (expt success-probability successes) (expt failure-probability failures)))) @@ -42,7 +43,7 @@ " ;; P(≥S) = P(=S) + P(=S+1) + … + P(N) - (Σ (n successes trials) + (u:Σ (n successes trials) (bernoulli-exactly n trials success-probability))) (define-problem lia (data stream) @@ -51,4 +52,4 @@ (let* ((generations (read data)) (target (read data)) (population (expt 2 generations))) - (float-string (bernoulli-at-least target population 1/4)))) + (u:float-string (bernoulli-at-least target population 1/4)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/long.lisp --- a/src/problems/long.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/long.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,6 +1,7 @@ -(in-package :rosalind) +(defpackage :rosalind/long (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/long) -(defparameter *input-long* +(defparameter *input* ">Rosalind_56 ATTAGACCTG >Rosalind_57 @@ -10,7 +11,7 @@ >Rosalind_59 GCCGGAATAC") -(defparameter *output-long* +(defparameter *output* "ATTAGACCTGCCGGAATAC") @@ -26,10 +27,8 @@ (concatenate 'string left (subseq right (overlap left right)))) -(define-problem long (data stream) - *input-long* - *output-long* - (let* ((dna (mapcar #'cdr (read-fasta-into-alist data))) +(define-problem long (data stream) *input* *output* + (let* ((dna (mapcar #'cdr (u:read-fasta-into-alist data))) (graph (digraph:make-digraph :initial-vertices dna :test #'equal))) (dolist (left dna) (dolist (right dna) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/mprt.lisp --- a/src/problems/mprt.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/mprt.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/mprt (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/mprt) ;; This was pretty simple, except for discovering that cl-ppcre's all-matches ;; function skips overlapping matches. Otherwise we just convert the motif to @@ -62,7 +63,7 @@ (iterate (with n-glycosylation = (motif-to-regex *motif-n-glycosylation*)) (for id :in-stream data :using #'read-line) - (for (nil . protein) = (uniprot id)) + (for (nil . protein) = (u:uniprot id)) (for matches = (all-matches-dammit n-glycosylation protein)) (when matches (format s "~A~%~{~D~*~^ ~}~%" id (mapcar #'1+ matches)))))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/mrna.lisp --- a/src/problems/mrna.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/mrna.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/mrna (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/mrna) ;; We're using a real programming language, so we have actual numbers and don't ;; need to bother with modular arithmetic for the tiny inputs they're giving us @@ -49,9 +50,7 @@ (length (acid-to-codons acid))) -(define-problem mrna (data string) - "MA" - "12" +(define-problem mrna (data string) "MA" "12" (product (delete #\newline data) :modulo 1000000 :key #'acid-codon-count diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/orf.lisp --- a/src/problems/orf.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/orf.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,10 +1,11 @@ -(in-package :rosalind) +(defpackage :rosalind/orf (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/orf) -(defparameter *input-orf* +(defparameter *input* ">Rosalind_99 AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG") -(defparameter *output-orf* +(defparameter *output* "M MTPRLGLESLLE MGMTPRLGLESLLE @@ -15,16 +16,14 @@ "Return all possible proteins that can be translated from `rna`." (iterate (for start :first 0 :then (1+ protein-start)) - (for (values protein protein-start) = (translate rna :start start)) + (for (values protein protein-start) = (u:translate rna :start start)) (while protein) (collect protein))) -(define-problem orf (data stream) - *input-orf* - *output-orf* - (let* ((dna (cdr (first (read-fasta-into-alist data)))) - (rna1 (transcribe dna)) - (rna2 (transcribe (reverse-complement dna)))) +(define-problem orf (data stream) *input* *output* + (let* ((dna (cdr (first (u:read-fasta-into-alist data)))) + (rna1 (u:transcribe dna)) + (rna2 (u:transcribe (u:reverse-complement dna)))) (-<> (append (translate-all rna1) (translate-all rna2)) (remove-duplicates <> :test #'string=) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/perm.lisp --- a/src/problems/perm.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/perm.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,8 +1,9 @@ -(in-package :rosalind) +(defpackage :rosalind/perm (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/perm) -(defparameter *input-perm* "3") +(defparameter *input* "3") -(defparameter *output-perm* "6 +(defparameter *output* "6 1 2 3 1 3 2 2 1 3 @@ -11,16 +12,11 @@ 3 2 1") -(define-problem perm (data string) - *input-perm* - *output-perm* +(define-problem perm (data string) *input* *output* (let* ((n (parse-integer data)) - (count (factorial n)) - (perms (permutations (alexandria:iota n :start 1)))) + (count (u:factorial n)) + (perms (u:permutations (alexandria:iota n :start 1)))) (format nil "~D~%~{~A~^~%~}" count ;; sort to ensure consistent output for the unit test - (sort (mapcar (curry #'str:join " ") perms) #'string<)))) - -;; (problem-perm "3") -;; (solve perm) + (sort (mapcar (u:curry #'str:join " ") perms) #'string<)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/pmch.lisp --- a/src/problems/pmch.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/pmch.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,9 +1,10 @@ -(in-package :rosalind) +(defpackage :rosalind/pmch (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/pmch) -(defparameter *input-pmch* ">Rosalind_23 +(defparameter *input* ">Rosalind_23 AGCUAGUCAU") -(defparameter *output-pmch* "12") +(defparameter *output* "12") ;; We can make a few observations to make things easier (well, trivial). ;; @@ -24,9 +25,7 @@ ;; adenine we have N-1 uracils. And so on down to the final pair. So the total ;; number of choices we have for each graph is N(N-1)(N-2)…(1) = N! -(define-problem pmch (data stream) - *input-pmch* - *output-pmch* - (let ((bases (nth-value 1 (read-fasta data)))) - (* (factorial (count #\A bases)) - (factorial (count #\G bases))))) +(define-problem pmch (data stream) *input* *output* + (let ((bases (nth-value 1 (u:read-fasta data)))) + (* (u:factorial (count #\A bases)) + (u:factorial (count #\G bases))))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/pper.lisp --- a/src/problems/pper.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/pper.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,15 +1,14 @@ -(in-package :rosalind) +(defpackage :rosalind/pper (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/pper) -(defparameter *input-pper* +(defparameter *input* "21 7") -(defparameter *output-pper* +(defparameter *output* "51200") -(define-problem pper (data stream) - *input-pper* - *output-pper* +(define-problem pper (data stream) *input* *output* (let ((total (read data)) (size (read data))) ;; The number of combinations of k out of n elements is: diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/prob.lisp --- a/src/problems/prob.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/prob.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,21 +1,19 @@ -(in-package :rosalind) +(defpackage :rosalind/prob (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/prob) -(defparameter *input-prob* +(defparameter *input* "ACGATACAA 0.129 0.287 0.423 0.476 0.641 0.742 0.783") -(defparameter *output-prob* +(defparameter *output* "-5.737 -5.217 -5.263 -5.360 -5.958 -6.628 -7.009") -(define-problem prob (data stream) - *input-prob* - *output-prob* +(define-problem prob (data stream) *input* *output* (let ((dna (read-line data)) (gc-contents (read-all-from-string (read-line data)))) - (flet - ((prob (gc-content) - (iterate - (for base :in-string dna) - (summing (log (base-probability gc-content base) 10))))) - (float-string (mapcar #'prob gc-contents))))) + (flet ((prob (gc-content) + (iterate + (for base :in-string dna) + (summing (log (u:base-probability gc-content base) 10))))) + (u:float-string (mapcar #'prob gc-contents))))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/prot.lisp --- a/src/problems/prot.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/prot.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,91 +1,7 @@ -(in-package :rosalind) - -(defmacro codon-case ((vector index) &rest clauses) - ;; Compiles a giant list of clauses into a tree of ECASEs. - ;; - ;; Each codon will have at most 3 ECASEs to pass through. Each ECASE has at - ;; most four options, so in the worst case we end up with 3 * 4 = 12 - ;; comparisons instead of 64. - ;; - ;; If we ever convert bases to vectors of (unsigned-byte 2)s we could - ;; potentially use a lookup table here, e.g.: - ;; - ;; (aref +amino-acids+ (+ x (ash y 2) (ash z 4))) - (alexandria:once-only (vector index) - (alexandria:with-gensyms (x y z) - `(let ((,x (aref ,vector ,index)) - (,y (aref ,vector (+ ,index 1))) - (,z (aref ,vector (+ ,index 2)))) - ,(labels ((strip (clauses) - (if (= 1 (length (caar clauses))) - (cadar clauses) - (iterate (for (head body) :in clauses) - (collect (list (subseq head 1) body))))) - (split (clauses) - (-<> clauses - (group-by (rcurry #'aref 0) <> :key #'first) - (iterate (for (k v) :in-hashtable <>) - (collect (list k (strip v))))))) - (recursively ((clauses (split clauses)) - (codons (list x y z)) - (i 0)) - `(ecase ,(first codons) - ,@(iterate (for (k remaining) :in clauses) - (collect `(,k ,(if (atom remaining) - remaining - (recur (split remaining) - (rest codons) - (1+ i))))))))))))) - -(defun codon-to-protein (vector index) - "Return the amino acid encoded by the codon in `vector` at `index`." - (codon-case (vector index) - ("UUU" #\F) ("CUU" #\L) ("AUU" #\I) ("GUU" #\V) - ("UUC" #\F) ("CUC" #\L) ("AUC" #\I) ("GUC" #\V) - ("UUA" #\L) ("CUA" #\L) ("AUA" #\I) ("GUA" #\V) - ("UUG" #\L) ("CUG" #\L) ("AUG" #\M) ("GUG" #\V) - ("UCU" #\S) ("CCU" #\P) ("ACU" #\T) ("GCU" #\A) - ("UCC" #\S) ("CCC" #\P) ("ACC" #\T) ("GCC" #\A) - ("UCA" #\S) ("CCA" #\P) ("ACA" #\T) ("GCA" #\A) - ("UCG" #\S) ("CCG" #\P) ("ACG" #\T) ("GCG" #\A) - ("UAU" #\Y) ("CAU" #\H) ("AAU" #\N) ("GAU" #\D) - ("UAC" #\Y) ("CAC" #\H) ("AAC" #\N) ("GAC" #\D) - ("UAA" nil) ("CAA" #\Q) ("AAA" #\K) ("GAA" #\E) - ("UAG" nil) ("CAG" #\Q) ("AAG" #\K) ("GAG" #\E) - ("UGU" #\C) ("CGU" #\R) ("AGU" #\S) ("GGU" #\G) - ("UGC" #\C) ("CGC" #\R) ("AGC" #\S) ("GGC" #\G) - ("UGA" nil) ("CGA" #\R) ("AGA" #\R) ("GGA" #\G) - ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G))) - -(defun translate (rna &key (start 0)) - "Translate a string of RNA bases into a protein string of amino acids. - - `rna` will be searched (beginning at `start`) for a start codon and - translation will proceed from there. If no start codon occurs after `start` - then `nil` will be returned. - - Once a start codon has been found, translation proceeds to the next stop - codon. If no stop codon is present, `nil` will be returned. - - Otherwise two values are returned: the protein string and the index into `rna` - where it started. - - " - (when-let ((start (search "AUG" rna :start2 start))) - (values - (iterate (with limit = (- (length rna) 3)) - (for i :from start :by 3) - (when (> i limit) - (return-from translate (values nil nil))) - (for protein = (codon-to-protein rna i)) - (while protein) - (collect protein :result-type 'string)) - start))) - +(defpackage :rosalind/prot (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/prot) (define-problem prot (data string) "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA" "MAMAPRTEINSTRING" - (translate data)) - -;; (solve prot) + (u:translate data)) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/prtm.lisp --- a/src/problems/prtm.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/prtm.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/prtm (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/prtm) (defconstant +monoisotopic-mass-of-water+ 18.01056d0 "The monoisotopic mass of a single water molecule, in Daltons.") @@ -33,9 +34,9 @@ (define-problem prtm (data string) "SKADYEK" "821.392" - (-<> data - (delete #\newline <>) - (summation <> :key #'monoisotopic-mass) - float-string)) + (_ data + (delete #\newline _) + (summation _ :key #'monoisotopic-mass) + u:float-string)) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/rear.lisp --- a/src/problems/rear.lisp Sun Jan 19 19:51:35 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -(in-package :rosalind) - -(defparameter *input-rear* "1 2 3 4 5 6 7 8 9 10 -3 1 5 2 7 4 9 6 10 8 - -3 10 8 2 5 4 7 1 6 9 -5 2 3 1 7 4 10 8 6 9 - -8 6 7 9 4 1 3 10 2 5 -8 2 7 6 9 1 5 3 10 4 - -3 9 10 4 1 8 6 7 5 2 -2 9 8 5 1 7 3 4 6 10 - -1 2 3 4 5 6 7 8 9 10 -1 2 3 4 5 6 7 8 9 10") - -(defparameter *output-rear* "9 4 5 7 0") - -(defun bounds-list (start end &key (min-length 0)) - "Return a list of all subseq bounds between `start` and `end`." - (iterate outer - (for s :from start :below end) - (iterate (for e :from (+ s min-length) :to end) - (in outer (collect (cons s e)))))) - -(defun nreverse-vector (vector &key start end) - (iterate (for i :from start) - (for j :downfrom (1- end)) - (repeat (floor (- end start) 2)) - (rotatef (aref vector i) - (aref vector j))) - vector) - -(defun reverse-vector (vector &key start end) - (nreverse-vector (copy-seq vector) :start start :end end)) - -(defun reversals (vector &key start end) - (iterate (for (s . e) :in (bounds-list start end :min-length 2)) - (collect (reverse-vector vector :start s :end e)))) - -(defun reversals-required (from to) - (iterate - (with remaining = (make-queue)) ; queue of (score . state) - (initially (enqueue (cons 0 from) remaining)) - (for (n . v) = (dequeue remaining)) - (for start = (mismatch v to)) - (for end = (mismatch v to :from-end t)) - (when (null start) - (return n)) - (incf n) - (dolist (r (reversals v :start start :end end)) - (enqueue (cons n r) remaining)))) - -;; todo: finish this one -;; (define-problem rear (data string) -;; *input-rear* -;; *output-rear* -;; (let ((pairs (-<> data -;; (str:split (format nil "~2%") <>) -;; (mapcar (curry #'str:split #\newline) <>) -;; (mapcar (curry #'mapcar #'read-all-from-string) <>) -;; (mapcar (curry #'mapcar (rcurry #'coerce 'vector)) <>)))) -;; (iterate (for (from to) :in pairs) -;; (collect (time (reversals-required from to)))))) - - diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/revc.lisp --- a/src/problems/revc.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/revc.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/revc (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/revc) ;; DNA is made up of two strands running in opposite directions, usually twisted ;; into a double helix, with the bases bonded: @@ -28,16 +29,8 @@ ;; polarized ends, with one end being called 3′ and the other being 5′, but I'm ;; not 100% sure. -(defun nreverse-complement (dna) - (map-into dna #'dna-complement dna) - (nreverse dna)) - -(defun reverse-complement (dna) - (nreverse-complement (copy-seq dna))) - (define-problem revc (data string) "AAAACCCGGT" "ACCGGGTTTT" - "Return the reverse complement of `data`." - (nreverse-complement (delete #\newline data))) + (u:nreverse-complement (delete #\newline data))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/revp.lisp --- a/src/problems/revp.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/revp.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/revp (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/revp) ;; The problem explanation provided a clever trick: you can cut the comparison ;; size in half by comparing the first half of the string to the reverse @@ -9,11 +10,11 @@ ;; AAC reverse ;; AAC=AAC palindrome! -(defparameter *input-revp* +(defparameter *input* ">Rosalind_24 TCAATGCATGCGGGTCTATATGCAT") -(defparameter *output-revp* +(defparameter *output* "4 6 5 4 6 6 @@ -30,7 +31,7 @@ (end (+ start length))) (unless (> end (length dna)) (string= dna - (reverse-complement (subseq dna mid end)) + (u:reverse-complement (subseq dna mid end)) :start1 start :end1 mid)))) @@ -38,12 +39,10 @@ (iterate (for i :from 12 :downto 4 :by 2) (finding i :such-that (reverse-palindrome-p dna start i)))) -(define-problem revp (data stream) - *input-revp* - *output-revp* +(define-problem revp (data stream) *input* *output* (with-output-to-string (s) (iterate - (with dna = (cdr (first (read-fasta-into-alist data)))) + (with dna = (cdr (first (u:read-fasta-into-alist data)))) (for i :index-of-vector dna) (when-let ((l (reverse-palindrome-length dna i))) (format s "~D ~D~%" (1+ i) l))))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/rna.lisp --- a/src/problems/rna.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/rna.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/rna (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/rna) ;; RNA is a another nucleic acid that is similar to DNA, with the following ;; differences: @@ -16,17 +17,9 @@ ;; After that the mRNA exists the nucleus. Then proteins are produced from the ;; mRNA by ribosomes. That process is called "translation". -(defun transcribe (dna) - "Transcribe a fresh RNA string from `DNA`." - (substitute #\U #\T dna)) - -(defun ntranscribe (dna) - "Destructively transcribe `DNA` to RNA in-place." - (nsubstitute #\U #\T dna)) - (define-problem rna (data string) "GATGGAACTTGACTACGTAAATT" "GAUGGAACUUGACUACGUAAAUU" "Transcribe `data` from DNA into RNA." - (ntranscribe data)) + (u:ntranscribe data)) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/rstr.lisp --- a/src/problems/rstr.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/rstr.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,16 +1,15 @@ -(in-package :rosalind) +(defpackage :rosalind/rstr (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/rstr) -(defparameter *input-rstr* "90000 0.6 +(defparameter *input* "90000 0.6 ATAGCCGA") -(defparameter *output-rstr* "0.689") +(defparameter *output* "0.689") -(define-problem rstr (data stream) - *input-rstr* - *output-rstr* +(define-problem rstr (data stream) *input* *output* (let* ((n (read data)) (gc (coerce (read data) 'double-float)) (dna (read-line data)) - (prob (sequence-probability gc dna))) - (float-string (- 1 (expt (- 1 prob) n))))) + (prob (u:sequence-probability gc dna))) + (u:float-string (- 1 (expt (- 1 prob) n))))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/seto.lisp --- a/src/problems/seto.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/seto.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,11 +1,12 @@ -(in-package :rosalind) +(defpackage :rosalind/seto (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/seto) -(defparameter *input-seto* +(defparameter *input* "10 {1, 2, 3, 4, 5} {2, 8, 5, 10}") -(defparameter *output-seto* +(defparameter *output* "{1, 2, 3, 4, 5, 8, 10} {2, 5} {1, 3, 4} @@ -21,8 +22,8 @@ (mapcar #'parse-integer (ppcre:all-matches-as-strings "\\d+" string))) (define-problem seto (data stream) - *input-seto* - *output-seto* + *input* + *output* (let ((u (alexandria:iota (read data) :start 1)) (a (parse-set (read-line data))) (b (parse-set (read-line data)))) @@ -37,5 +38,3 @@ #; Scratch -------------------------------------------------------------------- - -(problem-seto) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/sign.lisp --- a/src/problems/sign.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/sign.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,8 +1,9 @@ -(in-package :rosalind) +(defpackage :rosalind/sign (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/sign) -(defparameter *input-sign* "2") +(defparameter *input* "2") -(defparameter *output-sign* "8 +(defparameter *output* "8 -1 -2 -1 2 -2 -1 @@ -17,21 +18,16 @@ (if (null list) (list '()) ;; there is exactly one permutation of the empty list: () (destructuring-bind (n . more) list - (append (mapcar (curry #'cons n) (sign-permutations more)) - (mapcar (curry #'cons (- n)) (sign-permutations more)))))) + (append (mapcar (u:curry #'cons n) (sign-permutations more)) + (mapcar (u:curry #'cons (- n)) (sign-permutations more)))))) -(define-problem sign (data string) - *input-sign* - *output-sign* +(define-problem sign (data string) *input* *output* (let* ((n (parse-integer data)) (count (* (expt 2 n) - (factorial n))) + (u:factorial n))) (perms (mapcan #'sign-permutations - (permutations (alexandria:iota n :start 1))))) + (u:permutations (alexandria:iota n :start 1))))) ;; sort to ensure consistent output for the unit test (format nil "~D~%~{~A~^~%~}" count - (sort (mapcar (curry #'str:join " ") perms) #'string<)))) - -;; (problem-sign "2") -;; (solve sign) + (sort (mapcar (u:curry #'str:join " ") perms) #'string<)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/splc.lisp --- a/src/problems/splc.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/splc.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,13 +1,14 @@ -(in-package :rosalind) +(defpackage :rosalind/splc (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/splc) -(defparameter *input-splc* ">Rosalind_10 +(defparameter *input* ">Rosalind_10 ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG >Rosalind_12 ATCGGTCGAA >Rosalind_15 ATCGGTCGAGCGTGT") -(defparameter *output-splc* "MVYIADKQHVASREAYGHMFKVCA") +(defparameter *output* "MVYIADKQHVASREAYGHMFKVCA") (defun prefixp (prefix vector &key (start 0) (test #'eql)) @@ -31,7 +32,7 @@ (prefixp intron dna :start start)) (defun find-intron (introns dna &key (start 0)) - (find-if (rcurry #'intron-matches-p dna :start start) introns)) + (find-if (u:rcurry #'intron-matches-p dna :start start) introns)) (defun remove-introns (dna introns) (iterate @@ -43,11 +44,11 @@ (define-problem splc (data stream) - *input-splc* - *output-splc* + *input* + *output* (destructuring-bind (dna . introns) - (mapcar #'cdr (read-fasta-into-alist data)) - (-<> dna - (remove-introns <> introns) - transcribe - translate))) + (mapcar #'cdr (u:read-fasta-into-alist data)) + (_ dna + (remove-introns _ introns) + u:transcribe + u:translate))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/sseq.lisp --- a/src/problems/sseq.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/sseq.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,12 +1,13 @@ -(in-package :rosalind) +(defpackage :rosalind/sseq (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/sseq) -(defparameter *input-sseq* ">Rosalind_14 +(defparameter *input* ">Rosalind_14 ACGTACGTGACG >Rosalind_18 GTA ") -(defparameter *output-sseq* "3 4 5") +(defparameter *output* "3 4 5") ;; todo: make this more efficient for lists (defun subsequence-positions (needle haystack &key @@ -26,11 +27,9 @@ (return result) (setf n (elt needle ni)))))) -(define-problem sseq (data stream) - *input-sseq* - *output-sseq* - (let* ((haystack (nth-value 1 (read-fasta data))) - (needle (nth-value 1 (read-fasta data)))) +(define-problem sseq (data stream) *input* *output* + (let* ((haystack (nth-value 1 (u:read-fasta data))) + (needle (nth-value 1 (u:read-fasta data)))) (-<> (subsequence-positions needle haystack :test #'char=) (mapcar #'1+ <>) (format nil "~{~D~^ ~}" <>)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/sset.lisp --- a/src/problems/sset.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/sset.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,16 +1,12 @@ -(in-package :rosalind) +(defpackage :rosalind/sset (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/sset) ;; The cardinality of a power set is 2ⁿ, because you can represent an individual ;; set as a binary string where 1 means the element is included and 0 is not, ;; and there are 2ⁿ possible binary strings of length n. -(define-problem sset (data stream) - "3" - "8" +(define-problem sset (data stream) "3" "8" (mod (expt 2 (read data)) 1000000)) #; Scratch -------------------------------------------------------------------- - -(problem-sset) -(solve sset) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/subs.lisp --- a/src/problems/subs.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/subs.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,14 +1,13 @@ -(in-package :rosalind) +(defpackage :rosalind/subs (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/subs) -(defparameter *input-subs* "GATATATGCATATACTT +(defparameter *input* "GATATATGCATATACTT ATAT") -(defparameter *output-subs* "2 4 10") +(defparameter *output* "2 4 10") -(define-problem subs (data stream) - *input-subs* - *output-subs* +(define-problem subs (data stream) *input* *output* (let ((haystack (read-line data)) (needle (read-line data))) (iterate diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/tran.lisp --- a/src/problems/tran.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/tran.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,13 +1,26 @@ -(in-package :rosalind) +(defpackage :rosalind/tran (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/tran) -(defparameter *input-tran* ">Rosalind_0209 +(defparameter *input* ">Rosalind_0209 GCAACGCACAACGAAAACCCTTAGGGACTGGATTATTTCGTGATCGTTGTAGTTATTGGA AGTACGGGCATCAACCCAGTT >Rosalind_2200 TTATCTGACAAAGAAAGCCGTCAACGGCTGGATAATTTCGCGATCGTGCTGGTTACTGGC GGTACGAGTGTTCCTTTGGGT") -(defparameter *output-tran* "1.21428571429") +(defparameter *output* "1.21428571429") + +(defun rings (base) + "Return the number of rings in the structure of `base`. + + Pyrimidines (cytosine, thymine, and uracil) have a single-ring structure. + + Purines (adenine and guanine) have a double-ring structure. + + " + (ecase base + ((#\A #\G) 2) + ((#\C #\T #\U) 1))) (defun transitionp (x y) (and (char/= x y) @@ -17,14 +30,9 @@ (and (char/= x y) (/= (rings x) (rings y)))) -(define-problem tran (data stream) - *input-tran* - *output-tran* +(define-problem tran (data stream) *input* *output* (destructuring-bind (x y) - (mapcar #'cdr (read-fasta-into-alist data)) - (format nil "~,11F" (coerce (/ (mapcount #'transitionp x y) - (mapcount #'transversionp x y)) + (mapcar #'cdr (u:read-fasta-into-alist data)) + (format nil "~,11F" (coerce (/ (u:mapcount #'transitionp x y) + (u:mapcount #'transversionp x y)) 'double-float)))) - -;; (problem-tran "2") -;; (solve tran) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/tree.lisp --- a/src/problems/tree.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/tree.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,4 +1,5 @@ -(in-package :rosalind) +(defpackage :rosalind/tree (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/tree) ;; For every edge we add we can link up two previously-unconnected trees. If we ;; have N separate trees, we just need N-1 edges to connect them. So the @@ -6,7 +7,7 @@ ;; given graph. Some day I really need to add that as a utility function to ;; cl-digraph. -(defparameter *input-tree* +(defparameter *input* "10 1 2 2 8 @@ -15,7 +16,7 @@ 6 10 7 9") -(defparameter *output-tree* +(defparameter *output* "3") (defun subgraph-vertices (graph start) @@ -29,11 +30,9 @@ (while found) (for subgraph = (subgraph-vertices graph vertex)) (collect subgraph) - (map nil (curry #'digraph:remove-vertex graph) subgraph)))) + (map nil (u:curry #'digraph:remove-vertex graph) subgraph)))) -(define-problem tree (data stream) - *input-tree* - *output-tree* +(define-problem tree (data stream) *input* *output* (let ((graph (digraph:make-digraph :initial-vertices (alexandria:iota (read data) :start 1)))) (iterate @@ -45,5 +44,3 @@ #; Scratch -------------------------------------------------------------------- - -(problem-tree) diff -r 2d34585c5704 -r 2735aa6aab79 src/problems/trie.lisp --- a/src/problems/trie.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/problems/trie.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,11 +1,12 @@ -(in-package :rosalind) +(defpackage :rosalind/trie (:use :cl :rosalind :losh :iterate)) +(in-package :rosalind/trie) -(defparameter *input-trie* +(defparameter *input* "ATAGA ATC GAT") -(defparameter *output-trie* +(defparameter *output* "1 2 A 2 3 T 3 4 A @@ -22,12 +23,12 @@ (defun make-trie (strings) (recursively ((strings strings)) - (let ((terminal (find-if #'string-empty-p strings)) - (strings (remove-if #'string-empty-p strings))) + (let ((terminal (find-if #'u:string-empty-p strings)) + (strings (remove-if #'u:string-empty-p strings))) (make-trie-node :terminal (if terminal t nil) :children (iterate - (for (ch kids) :in-hashtable (group-by #'first-char strings)) + (for (ch kids) :in-hashtable (group-by #'u:first-char strings)) (collect-hash (ch (recur (mapcar (lambda (s) (subseq s 1)) kids))))))))) @@ -78,10 +79,8 @@ (gather (list (n node) (n child) ch)) (recur child))))))) -(define-problem trie (data stream) - *input-trie* - *output-trie* - (let* ((strings (read-lines data)) +(define-problem trie (data stream) *input* *output* + (let* ((strings (u:read-lines data)) (trie (make-trie strings))) ;; (dot-graph 'trie trie :rankdir :tb) (format nil "~{~{~A~^ ~}~^~%~}" (trie-adjacency-list trie)))) diff -r 2d34585c5704 -r 2735aa6aab79 src/utils.lisp --- a/src/utils.lisp Sun Jan 19 19:51:35 2020 -0500 +++ b/src/utils.lisp Sun Jan 19 20:38:29 2020 -0500 @@ -1,29 +1,6 @@ -(in-package :rosalind) +(in-package :rosalind/utils) ;;;; Misc --------------------------------------------------------------------- -(defun sh (command input) - (declare (ignorable command input)) - #+sbcl - (sb-ext:run-program (first command) (rest command) - :search t - :input (make-string-input-stream input)) - #+ccl - (ccl:run-program (first command) (rest command) - :input (make-string-input-stream input)) - #+abcl - (let ((p (system:run-program (first command) (rest command) - :input :stream - :output t - :wait nil))) - (write-string input (system:process-input p)) - (close (system:process-input p))) - #-(or sbcl ccl abcl) - (error "Not implemented for this Lisp implementation, sorry")) - -(defun pbcopy (string) - (values string (sh '("pbcopy") string))) - - (defun ensure-stream (input) (ctypecase input (stream input) @@ -34,61 +11,11 @@ (stream (alexandria:read-stream-content-into-string input)) (string (copy-seq input)))) -(defun ensure-list (value) - (if (listp value) - value - (list value))) - - -(defun hamming (sequence1 sequence2 &key (test #'eql)) - "Return the Hamming distance between `sequence1` and `sequence2`." - ;; todo assert length=? - (let ((result 0)) - (map nil (lambda (x y) - (unless (funcall test x y) - (incf result))) - sequence1 - sequence2) - result)) - - -(defun factorial (x) - (check-type x (integer 0)) - (iterate (for i :from 1 :to x) - (multiplying i))) (defun permutations (items) (gathering (alexandria:map-permutations #'gather items))) -(defun dna-complement (base) - (ecase base - (#\A #\T) - (#\T #\A) - (#\G #\C) - (#\C #\G))) - -(defun rna-complement (base) - (ecase base - (#\A #\U) - (#\U #\A) - (#\G #\C) - (#\C #\G))) - - -(defun rings (base) - "Return the number of rings in the structure of `base`. - - Pyrimidines (cytosine, thymine, and uracil) have a single-ring structure. - - Purines (adenine and guanine) have a double-ring structure. - - " - (ecase base - ((#\A #\G) 2) - ((#\C #\T #\U) 1))) - - (defun-inline gcp (base) "Return whether `base` is G or C." (or (char= #\G base) @@ -116,6 +43,112 @@ result)) +;;;; Dogma -------------------------------------------------------------------- +(defun dna-complement (base) + (ecase base + (#\A #\T) + (#\T #\A) + (#\G #\C) + (#\C #\G))) + +(defun nreverse-complement (dna) + (map-into dna #'dna-complement dna) + (nreverse dna)) + +(defun reverse-complement (dna) + (nreverse-complement (copy-seq dna))) + +(defun transcribe (dna) + "Transcribe a fresh RNA string from `DNA`." + (substitute #\U #\T dna)) + +(defun ntranscribe (dna) + "Destructively transcribe `DNA` to RNA in-place." + (nsubstitute #\U #\T dna)) + +(defmacro codon-case ((vector index) &rest clauses) + ;; Compiles a giant list of clauses into a tree of ECASEs. + ;; + ;; Each codon will have at most 3 ECASEs to pass through. Each ECASE has at + ;; most four options, so in the worst case we end up with 3 * 4 = 12 + ;; comparisons instead of 64. + ;; + ;; If we ever convert bases to vectors of (unsigned-byte 2)s we could + ;; potentially use a lookup table here, e.g.: + ;; + ;; (aref +amino-acids+ (+ x (ash y 2) (ash z 4))) + (alexandria:once-only (vector index) + (alexandria:with-gensyms (x y z) + `(let ((,x (aref ,vector ,index)) + (,y (aref ,vector (+ ,index 1))) + (,z (aref ,vector (+ ,index 2)))) + ,(labels ((strip (clauses) + (if (= 1 (length (caar clauses))) + (cadar clauses) + (iterate (for (head body) :in clauses) + (collect (list (subseq head 1) body))))) + (split (clauses) + (-<> clauses + (group-by (rcurry #'aref 0) <> :key #'first) + (iterate (for (k v) :in-hashtable <>) + (collect (list k (strip v))))))) + (recursively ((clauses (split clauses)) + (codons (list x y z)) + (i 0)) + `(ecase ,(first codons) + ,@(iterate (for (k remaining) :in clauses) + (collect `(,k ,(if (atom remaining) + remaining + (recur (split remaining) + (rest codons) + (1+ i))))))))))))) + +(defun codon-to-protein (vector index) + "Return the amino acid encoded by the codon in `vector` at `index`." + (codon-case (vector index) + ("UUU" #\F) ("CUU" #\L) ("AUU" #\I) ("GUU" #\V) + ("UUC" #\F) ("CUC" #\L) ("AUC" #\I) ("GUC" #\V) + ("UUA" #\L) ("CUA" #\L) ("AUA" #\I) ("GUA" #\V) + ("UUG" #\L) ("CUG" #\L) ("AUG" #\M) ("GUG" #\V) + ("UCU" #\S) ("CCU" #\P) ("ACU" #\T) ("GCU" #\A) + ("UCC" #\S) ("CCC" #\P) ("ACC" #\T) ("GCC" #\A) + ("UCA" #\S) ("CCA" #\P) ("ACA" #\T) ("GCA" #\A) + ("UCG" #\S) ("CCG" #\P) ("ACG" #\T) ("GCG" #\A) + ("UAU" #\Y) ("CAU" #\H) ("AAU" #\N) ("GAU" #\D) + ("UAC" #\Y) ("CAC" #\H) ("AAC" #\N) ("GAC" #\D) + ("UAA" nil) ("CAA" #\Q) ("AAA" #\K) ("GAA" #\E) + ("UAG" nil) ("CAG" #\Q) ("AAG" #\K) ("GAG" #\E) + ("UGU" #\C) ("CGU" #\R) ("AGU" #\S) ("GGU" #\G) + ("UGC" #\C) ("CGC" #\R) ("AGC" #\S) ("GGC" #\G) + ("UGA" nil) ("CGA" #\R) ("AGA" #\R) ("GGA" #\G) + ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G))) + +(defun translate (rna &key (start 0)) + "Translate a string of RNA bases into a protein string of amino acids. + + `rna` will be searched (beginning at `start`) for a start codon and + translation will proceed from there. If no start codon occurs after `start` + then `nil` will be returned. + + Once a start codon has been found, translation proceeds to the next stop + codon. If no stop codon is present, `nil` will be returned. + + Otherwise two values are returned: the protein string and the index into `rna` + where it started. + + " + (when-let ((start (search "AUG" rna :start2 start))) + (values + (iterate (with limit = (- (length rna) 3)) + (for i :from start :by 3) + (when (> i limit) + (return-from translate (values nil nil))) + (for protein = (codon-to-protein rna i)) + (while protein) + (collect protein :result-type 'string)) + start))) + + ;;;; Strings ------------------------------------------------------------------ (defun string-empty-p (string) (zerop (length string))) @@ -127,6 +160,12 @@ ;;;; Math --------------------------------------------------------------------- +(defun factorial (x) + (check-type x (integer 0)) + (iterate (for i :from 1 :to x) + (multiplying i))) + + (defmacro do-sum ((var from to) &body body) "Sum `body` with `var` iterating over `[from, to]`. @@ -252,13 +291,6 @@ (replace buffer initial-contents)) buffer)) -(defun make-string-buffer - (&key initial-contents - (initial-capacity (max 64 (length initial-contents)))) - (make-buffer :initial-contents initial-contents - :initial-capacity initial-capacity - :element-type 'character)) - (defun buffer-push (buffer element) (vector-push-extend element buffer) element) @@ -360,7 +392,7 @@ ;;;; Uniprot ------------------------------------------------------------------ -(defparameter *uniprot-cache* (make-hash-table :test #'equal)) +(defvar *uniprot-cache* (make-hash-table :test #'equal)) (defmacro get-cached (key cache expr) (once-only (key cache) @@ -374,7 +406,7 @@ (defun uniprot (id) (get-cached id *uniprot-cache* - (-<> (uniprot-url id) + (_ (uniprot-url id) drakma:http-request read-fasta-into-alist first))) @@ -383,14 +415,14 @@ ;;;; Output ------------------------------------------------------------------- (defun float-string (float-or-floats &optional (precision 3)) (with-output-to-string (s) - (loop :for (float . more) :on (ensure-list float-or-floats) + (loop :for (float . more) :on (alexandria:ensure-list float-or-floats) :do (format s "~,VF~:[~; ~]" precision float more)))) ;;;; Testing ------------------------------------------------------------------ (defmacro define-test (problem input output &optional (test 'string=)) - `(test ,(alexandria:symbolicate 'test- problem) - (is (,test ,output (aesthetic-string (,problem ,input)))))) + `(1am:test ,(symbolicate 'test- problem) + (1am:is (,test ,output (aesthetic-string (,problem ,input)))))) (defun run-tests () (1am:run)) @@ -400,7 +432,7 @@ (defmacro define-problem (name (arg type) sample-input sample-output &body body) (multiple-value-bind (body declarations docstring) (alexandria:parse-body body :documentation t) - (let ((symbol (alexandria:symbolicate 'problem- name))) + (let ((symbol (symbolicate 'problem- name))) `(progn (defun ,symbol (&optional (,arg ,sample-input)) ,@(when docstring (list docstring)) @@ -423,5 +455,5 @@ (defmacro solve (name) (assert (symbolp name) () "Usage: (solve foo)~%foo should not be quoted.") - `(solve% ',(alexandria:symbolicate 'problem- name))) + `(solve% ',(symbolicate 'problem- name)))