cd3fc11e3298

CONV, SPEC
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Thu, 04 Aug 2022 22:30:18 -0400
parents 0c68769a8788
children a95ed046cc2c
branches/tags (none)
files rosalind.asd src/package.lisp src/problems/conv.lisp src/problems/prtm.lisp src/problems/spec.lisp src/utils.lisp

Changes

--- a/rosalind.asd	Thu Aug 04 21:48:26 2022 -0400
+++ b/rosalind.asd	Thu Aug 04 22:30:18 2022 -0400
@@ -30,6 +30,7 @@
                :iterate
                :losh
                :str
+               :parse-float
 
                )
 
--- a/src/package.lisp	Thu Aug 04 21:48:26 2022 -0400
+++ b/src/package.lisp	Thu Aug 04 22:30:18 2022 -0400
@@ -20,6 +20,10 @@
     :transcribe :ntranscribe
     :translate
 
+    :+monoisotopic-mass-of-water+
+    :monoisotopic-mass
+    :*monoisotopic-masses*
+
     :gcp :base-probability :sequence-probability
 
     :mapcount
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/conv.lisp	Thu Aug 04 22:30:18 2022 -0400
@@ -0,0 +1,33 @@
+(defpackage :rosalind/conv (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/conv)
+
+(defparameter *input* "186.07931 287.12699 548.20532 580.18077 681.22845 706.27446 782.27613 968.35544 968.35544
+101.04768 158.06914 202.09536 318.09979 419.14747 463.17369")
+
+(defparameter *output* "3
+85.03163")
+
+(defun parse (stream)
+  (_ (read-line stream)
+    (str:split " " _)
+    (let ((*read-default-float-format* 'rational))
+      (mapcar #'read-from-string _))
+    frequencies))
+
+(defun minkowski-difference (s1 s2)
+  (iterate
+    (with-result result = (make-hash-table :size (* (hash-table-count s1)
+                                                    (hash-table-count s2))))
+    (for (k1 v1) :in-hashtable s1)
+    (iterate (for (k2 v2) :in-hashtable s2)
+             (incf (gethash (- k1 k2) result 0)
+                   (* v1 v2)))))
+
+(defun msmax (multiset)
+  (iterate (for (k v) :in-hashtable multiset)
+           (finding (cons k v) :maximizing v)))
+
+(define-problem conv (data stream) *input* *output*
+  (destructuring-bind (weight . n)
+      (msmax (minkowski-difference (parse data) (parse data)))
+    (format nil "~D~%~F" n weight)))
--- a/src/problems/prtm.lisp	Thu Aug 04 21:48:26 2022 -0400
+++ b/src/problems/prtm.lisp	Thu Aug 04 22:30:18 2022 -0400
@@ -1,42 +1,13 @@
 (defpackage :rosalind/prtm (:use :cl :rosalind :losh :iterate))
 (in-package :rosalind/prtm)
 
-(defconstant +monoisotopic-mass-of-water+ 18.01056d0
-  "The monoisotopic mass of a single water molecule, in Daltons.")
-
-(defun monoisotopic-mass (residue)
-  ;; todo is a hash table faster here?  we could also do an array
-  ;; starting at (char-code #\A) if we really wanted
-  (ecase residue
-    ;; These have to be doubles or we get too much rounding error.  It's fine.
-    (#\A  71.03711d0)
-    (#\C 103.00919d0)
-    (#\D 115.02694d0)
-    (#\E 129.04259d0)
-    (#\F 147.06841d0)
-    (#\G  57.02146d0)
-    (#\H 137.05891d0)
-    (#\I 113.08406d0)
-    (#\K 128.09496d0)
-    (#\L 113.08406d0)
-    (#\M 131.04049d0)
-    (#\N 114.04293d0)
-    (#\P  97.05276d0)
-    (#\Q 128.05858d0)
-    (#\R 156.10111d0)
-    (#\S  87.03203d0)
-    (#\T 101.04768d0)
-    (#\V  99.06841d0)
-    (#\W 186.07931d0)
-    (#\Y 163.06333d0)))
-
 
 (define-problem prtm (data string)
     "SKADYEK"
     "821.392"
   (_ data
     (delete #\newline _)
-    (summation _ :key #'monoisotopic-mass)
+    (summation _ :key #'u:monoisotopic-mass)
     u:float-string))
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/spec.lisp	Thu Aug 04 22:30:18 2022 -0400
@@ -0,0 +1,24 @@
+(defpackage :rosalind/spec (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/spec)
+
+(defparameter *input* "3524.8542
+3710.9335
+3841.974
+3970.0326
+4057.0646")
+
+(defparameter *output* "WMQS")
+
+
+(defun roughly= (a b &optional (epsilon (* 0.000001d0 (min a b))))
+  (< (abs (- a b)) epsilon))
+
+(defun find-amino-acid (weight)
+  (rassocar weight u:*monoisotopic-masses* :test #'roughly=))
+
+(define-problem spec (data stream) *input* *output*
+  (let* ((*read-default-float-format* 'double-float)
+         (prefix-weights (u:read-lines data :key #'parse-float:parse-float))
+         (weights (mapcar #'- (rest prefix-weights) prefix-weights))
+         (result (mapcar #'find-amino-acid weights)))
+    (string-join "" result)))
--- a/src/utils.lisp	Thu Aug 04 21:48:26 2022 -0400
+++ b/src/utils.lisp	Thu Aug 04 22:30:18 2022 -0400
@@ -161,6 +161,59 @@
       start)))
 
 
+;;;; Protein ------------------------------------------------------------------
+(defconstant +monoisotopic-mass-of-water+ 18.01056d0
+  "The monoisotopic mass of a single water molecule, in Daltons.")
+
+(defparameter *monoisotopic-masses*
+  '((#\A .  71.03711d0)
+    (#\C . 103.00919d0)
+    (#\D . 115.02694d0)
+    (#\E . 129.04259d0)
+    (#\F . 147.06841d0)
+    (#\G .  57.02146d0)
+    (#\H . 137.05891d0)
+    (#\I . 113.08406d0)
+    (#\K . 128.09496d0)
+    (#\L . 113.08406d0)
+    (#\M . 131.04049d0)
+    (#\N . 114.04293d0)
+    (#\P .  97.05276d0)
+    (#\Q . 128.05858d0)
+    (#\R . 156.10111d0)
+    (#\S .  87.03203d0)
+    (#\T . 101.04768d0)
+    (#\V .  99.06841d0)
+    (#\W . 186.07931d0)
+    (#\Y . 163.06333d0)))
+
+(defun monoisotopic-mass (residue)
+  ;; todo is a hash table faster here?  we could also do an array
+  ;; starting at (char-code #\A) if we really wanted
+  (ecase residue
+    ;; These have to be doubles or we get too much rounding error.  It's fine.
+    (#\A  71.03711d0)
+    (#\C 103.00919d0)
+    (#\D 115.02694d0)
+    (#\E 129.04259d0)
+    (#\F 147.06841d0)
+    (#\G  57.02146d0)
+    (#\H 137.05891d0)
+    (#\I 113.08406d0)
+    (#\K 128.09496d0)
+    (#\L 113.08406d0)
+    (#\M 131.04049d0)
+    (#\N 114.04293d0)
+    (#\P  97.05276d0)
+    (#\Q 128.05858d0)
+    (#\R 156.10111d0)
+    (#\S  87.03203d0)
+    (#\T 101.04768d0)
+    (#\V  99.06841d0)
+    (#\W 186.07931d0)
+    (#\Y 163.06333d0)))
+
+
 ;;;; Strings ------------------------------------------------------------------
 (defun string-empty-p (string)
   (zerop (length string)))
@@ -319,10 +372,12 @@
 
 
 ;;;; Readers ------------------------------------------------------------------
-(defun read-lines (stream)
+(defun read-lines (stream &key key)
   "Read all lines from `stream` and return them as a fresh list of strings."
   (iterate (for line :in-stream stream :using #'read-line)
-           (collect line)))
+           (collect (if key
+                      (funcall key line)
+                      line))))
 
 
 ;;;; Buffers ------------------------------------------------------------------