--- a/src/package.lisp Fri Aug 05 00:16:11 2022 -0400
+++ b/src/package.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -22,6 +22,7 @@
:+monoisotopic-mass-of-water+
:monoisotopic-mass
+ :residue-with-mass
:*monoisotopic-masses*
:gcp :base-probability :sequence-probability
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lcsq.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -0,0 +1,72 @@
+(defpackage :rosalind/lcsq (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/lcsq)
+
+(defparameter *input* ">Rosalind_23
+AACCTTGG
+>Rosalind_64
+ACACTGTGA")
+
+(defparameter *output* "AACTTG")
+
+(defun print-2d-array (array)
+ (check-type array (array * (* *)))
+ ;; This will cons a lot.
+ (iterate
+ (with strings = (make-array (array-dimensions array)))
+ (for el :across-flat-array array :with-index i)
+ (for s = (princ-to-string el))
+ (setf (row-major-aref strings i) (princ-to-string el))
+ (maximizing (length s) :into len)
+ (finally (dotimes (row (array-dimension strings 0))
+ (dotimes (col (array-dimension strings 1))
+ (format t " ~V@A" len (aref strings row col)))
+ (terpri))))
+ array)
+
+(defun build-table (a b)
+ ;; From CLRS 15.4
+ (iterate
+ (with-result table = (make-array (list (1+ (length a))
+ (1+ (length b)))
+ :initial-element 0))
+ (for ca :in-string a)
+ (for i :from 1)
+ (iterate
+ (for cb :in-string b)
+ (for j :from 1)
+ (for up = (aref table (1- i) j))
+ (for left = (aref table i (1- j)))
+ (for upleft = (aref table (1- i) (1- j)))
+ (setf (aref table i j)
+ (cond ((char= ca cb) (1+ upleft))
+ ((>= up left) up)
+ (t left))))))
+
+(defun reconstruct-lcs-from-table (a b table)
+ (nreverse
+ (iterate
+ (with i = (length a))
+ (with j = (length b))
+ (until (or (zerop i) (zerop j)))
+ (for ca = (aref a (1- i)))
+ (for cb = (aref b (1- j)))
+ (for up = (aref table (1- i) j))
+ (for left = (aref table i (1- j)))
+ (cond ((char= ca cb) (progn (collect ca :result-type string)
+ (decf i)
+ (decf j)))
+ ((>= up left) (decf i))
+ (t (decf j))))))
+
+(defun least-common-subsequence (a b)
+ (check-type a string)
+ (check-type b string)
+ (reconstruct-lcs-from-table a b (build-table a b)))
+
+
+(define-problem lcsq (data stream) *input* *output*
+ (destructuring-bind (a b) (mapcar #'cdr (u:read-fasta-into-alist data))
+ (least-common-subsequence a b)))
+
+#; Scratch --------------------------------------------------------------------
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/prsm.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -0,0 +1,47 @@
+(defpackage :rosalind/prsm (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/prsm)
+
+(defparameter *input* "4
+GSDMQS
+VWICN
+IASWMQS
+PVSMGAD
+445.17838
+115.02694
+186.07931
+314.13789
+317.1198
+215.09061")
+
+(defparameter *output* "3
+GSDMQS")
+
+
+(defun complete-spectrum (protein)
+ (let ((result (make-hash-table)))
+ (incf (gethash (u:monoisotopic-mass protein) result 0))
+ (iterate (for i :from 1 :below (length protein))
+ (incf (gethash (u:monoisotopic-mass protein :start i) result 0)))
+ (iterate (for i :from (1- (length protein)) :above 0)
+ (incf (gethash (u:monoisotopic-mass protein :end i) result 0)))
+ result))
+
+(defun multiplicity (spectrum string)
+ ;; TODO extract these into utils?
+ (cdr (rosalind/conv::msmax
+ (rosalind/conv::minkowski-difference spectrum
+ (complete-spectrum string)))))
+
+
+(define-problem prsm (data stream) *input* *output*
+ (let* ((*read-default-float-format* 'rational)
+ (n (parse-integer (read-line data)))
+ (proteins (gimme n (read-line data)))
+ (spectrum (frequencies (u:read-lines data :key #'read-from-string))))
+ (iterate
+ (for protein :in proteins)
+ (for m = (multiplicity spectrum protein))
+ (finding (cons m protein) :maximizing m :into result)
+ (finally (return (format nil "~D~%~A" (car result) (cdr result)))))))
+
+
--- a/src/problems/prtm.lisp Fri Aug 05 00:16:11 2022 -0400
+++ b/src/problems/prtm.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -7,7 +7,7 @@
"821.392"
(_ data
(delete #\newline _)
- (summation _ :key #'u:monoisotopic-mass)
+ u:monoisotopic-mass
u:float-string))
--- a/src/problems/spec.lisp Fri Aug 05 00:16:11 2022 -0400
+++ b/src/problems/spec.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -10,15 +10,9 @@
(defparameter *output* "WMQS")
-(defun roughly= (a b &optional (epsilon (* 0.000001d0 (min a b))))
- (< (abs (- a b)) epsilon))
-
-(defun find-amino-acid (weight)
- (rassocar weight u:*monoisotopic-masses* :test #'roughly=))
-
(define-problem spec (data stream) *input* *output*
- (let* ((*read-default-float-format* 'double-float)
- (prefix-weights (u:read-lines data :key #'parse-float:parse-float))
+ (let* ((*read-default-float-format* 'rational)
+ (prefix-weights (u:read-lines data :key #'read-from-string))
(weights (mapcar #'- (rest prefix-weights) prefix-weights))
- (result (mapcar #'find-amino-acid weights)))
+ (result (mapcar #'u:residue-with-mass weights)))
(string-join "" result)))
--- a/src/utils.lisp Fri Aug 05 00:16:11 2022 -0400
+++ b/src/utils.lisp Mon Aug 08 19:00:58 2022 -0400
@@ -166,52 +166,64 @@
"The monoisotopic mass of a single water molecule, in Daltons.")
(defparameter *monoisotopic-masses*
- '((#\A . 71.03711d0)
- (#\C . 103.00919d0)
- (#\D . 115.02694d0)
- (#\E . 129.04259d0)
- (#\F . 147.06841d0)
- (#\G . 57.02146d0)
- (#\H . 137.05891d0)
- (#\I . 113.08406d0)
- (#\K . 128.09496d0)
- (#\L . 113.08406d0)
- (#\M . 131.04049d0)
- (#\N . 114.04293d0)
- (#\P . 97.05276d0)
- (#\Q . 128.05858d0)
- (#\R . 156.10111d0)
- (#\S . 87.03203d0)
- (#\T . 101.04768d0)
- (#\V . 99.06841d0)
- (#\W . 186.07931d0)
- (#\Y . 163.06333d0)))
+ '((#\A . 71.03711r0)
+ (#\C . 103.00919r0)
+ (#\D . 115.02694r0)
+ (#\E . 129.04259r0)
+ (#\F . 147.06841r0)
+ (#\G . 57.02146r0)
+ (#\H . 137.05891r0)
+ (#\I . 113.08406r0)
+ (#\K . 128.09496r0)
+ (#\L . 113.08406r0)
+ (#\M . 131.04049r0)
+ (#\N . 114.04293r0)
+ (#\P . 97.05276r0)
+ (#\Q . 128.05858r0)
+ (#\R . 156.10111r0)
+ (#\S . 87.03203r0)
+ (#\T . 101.04768r0)
+ (#\V . 99.06841r0)
+ (#\W . 186.07931r0)
+ (#\Y . 163.06333r0)))
-(defun monoisotopic-mass (residue)
+(defun-inline residue-mass (residue)
;; todo is a hash table faster here? we could also do an array
;; starting at (char-code #\A) if we really wanted
(ecase residue
- ;; These have to be doubles or we get too much rounding error. It's fine.
- (#\A 71.03711d0)
- (#\C 103.00919d0)
- (#\D 115.02694d0)
- (#\E 129.04259d0)
- (#\F 147.06841d0)
- (#\G 57.02146d0)
- (#\H 137.05891d0)
- (#\I 113.08406d0)
- (#\K 128.09496d0)
- (#\L 113.08406d0)
- (#\M 131.04049d0)
- (#\N 114.04293d0)
- (#\P 97.05276d0)
- (#\Q 128.05858d0)
- (#\R 156.10111d0)
- (#\S 87.03203d0)
- (#\T 101.04768d0)
- (#\V 99.06841d0)
- (#\W 186.07931d0)
- (#\Y 163.06333d0)))
+ ;; Just use rationals to avoid rounding errors and make them usable in hash
+ ;; tables. This doesn't have to be particularly fast.
+ (#\A 71.03711r0)
+ (#\C 103.00919r0)
+ (#\D 115.02694r0)
+ (#\E 129.04259r0)
+ (#\F 147.06841r0)
+ (#\G 57.02146r0)
+ (#\H 137.05891r0)
+ (#\I 113.08406r0)
+ (#\K 128.09496r0)
+ (#\L 113.08406r0)
+ (#\M 131.04049r0)
+ (#\N 114.04293r0)
+ (#\P 97.05276r0)
+ (#\Q 128.05858r0)
+ (#\R 156.10111r0)
+ (#\S 87.03203r0)
+ (#\T 101.04768r0)
+ (#\V 99.06841r0)
+ (#\W 186.07931r0)
+ (#\Y 163.06333r0)))
+
+(defun roughly= (a b &optional (epsilon (* 0.000001d0 (min a b))))
+ (< (abs (- a b)) epsilon))
+
+(defun residue-with-mass (mass)
+ (rassocar mass u:*monoisotopic-masses* :test #'roughly=))
+
+(defun monoisotopic-mass (protein &key (start 0) (end (length protein)))
+ (check-type protein string)
+ (loop :for i :from start :below end
+ :summing (residue-mass (char protein i))))
;;;; Strings ------------------------------------------------------------------