870270771fde

PDST
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sun, 19 Jan 2020 21:14:53 -0500
parents 2735aa6aab79
children 8f6ef53eac55
branches/tags (none)
files src/package.lisp src/problems/hamm.lisp src/problems/pdst.lisp src/utils.lisp

Changes

--- a/src/package.lisp	Sun Jan 19 20:38:29 2020 -0500
+++ b/src/package.lisp	Sun Jan 19 21:14:53 2020 -0500
@@ -24,6 +24,8 @@
 
     :mapcount
 
+    :hamming
+
     :string-empty-p
     :first-char
 
--- a/src/problems/hamm.lisp	Sun Jan 19 20:38:29 2020 -0500
+++ b/src/problems/hamm.lisp	Sun Jan 19 21:14:53 2020 -0500
@@ -4,18 +4,7 @@
 (defparameter *input* "GAGCCTACTAACGGGAT
 CATCGTAATGACGGCCT")
 
-(defun hamming (sequence1 sequence2 &key (test #'eql))
-  "Return the Hamming distance between `sequence1` and `sequence2`."
-  ;; todo assert length=?
-  (let ((result 0))
-    (map nil (lambda (x y)
-               (unless (funcall test x y)
-                 (incf result)))
-         sequence1
-         sequence2)
-    result))
-
 (define-problem hamm (data stream)
     *input*
     "7"
-  (hamming (read-line data) (read-line data) :test #'char=))
+  (u:hamming (read-line data) (read-line data) :test #'char=))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/pdst.lisp	Sun Jan 19 21:14:53 2020 -0500
@@ -0,0 +1,42 @@
+(defpackage :rosalind/pdst (:use :cl :rosalind :losh :iterate))
+(in-package :rosalind/pdst)
+
+(defparameter *input* ">Rosalind_9499
+TTTCCATTTA
+>Rosalind_0942
+GATTCATTTC
+>Rosalind_6568
+TTTCCATTTT
+>Rosalind_1833
+GTTCCATTTA")
+
+(defparameter *output* "0.00000 0.40000 0.10000 0.10000
+0.40000 0.00000 0.40000 0.30000
+0.10000 0.40000 0.00000 0.20000
+0.10000 0.30000 0.20000 0.00000")
+
+(defun p-distance (a b)
+  (assert (= (length a) (length b)))
+  (/ (u:hamming a b) (length a)))
+
+(defun make-p-distance-matrix (seqs)
+  (let* ((n (length seqs))
+         (matrix (make-array (list n n) :initial-element 0)))
+    (iterate
+      (for row :from 0 :below n)
+      (for start :from 1) ;; Don't bother comparing seqs with themselves.
+      (for (a . remaining) :on seqs)
+      (iterate
+        (for b :in remaining)
+        (for col :from start :below n)
+        (for pd = (p-distance a b))
+        (setf (aref matrix row col) pd ;; The matrix is symmetric along the diagonal.
+              (aref matrix col row) pd)))
+    matrix))
+
+(define-problem pdst (data stream) *input* *output*
+  (_ data
+    u:read-fasta-into-alist
+    (mapcar #'cdr _)
+    make-p-distance-matrix
+    (u:float-string _ 5)))
--- a/src/utils.lisp	Sun Jan 19 20:38:29 2020 -0500
+++ b/src/utils.lisp	Sun Jan 19 21:14:53 2020 -0500
@@ -43,6 +43,18 @@
     result))
 
 
+(defun hamming (sequence1 sequence2 &key (test #'eql))
+  "Return the Hamming distance between `sequence1` and `sequence2`."
+  ;; todo assert length=?
+  (let ((result 0))
+    (map nil (lambda (x y)
+               (unless (funcall test x y)
+                 (incf result)))
+         sequence1
+         sequence2)
+    result))
+
+
 ;;;; Dogma --------------------------------------------------------------------
 (defun dna-complement (base)
   (ecase base
@@ -414,9 +426,36 @@
 
 ;;;; Output -------------------------------------------------------------------
 (defun float-string (float-or-floats &optional (precision 3))
+  "Return a string of `float-or-floats` in the format Rosalind wants.
+
+  `float-or-floats` can be a number, list, vector, or 2D array.
+
+  Each float will be printed with `precision` digits after the decimal point.
+
+  If a list or vector is given, the floats will be separated by a space.
+
+  If a two-dimensional array is given, the rows will be separated by newlines.
+
+  "
   (with-output-to-string (s)
-    (loop :for (float . more) :on (alexandria:ensure-list float-or-floats)
-          :do (format s "~,VF~:[~; ~]" precision float more))))
+    (flet ((p (float &optional space)
+             (format s "~,VF~:[~; ~]" precision float space)))
+      (etypecase float-or-floats
+        ((or number list)
+         (loop :for (float . more) :on (alexandria:ensure-list float-or-floats)
+               :do (p float more)))
+        ((array * (*))
+         (loop :with last = (1- (length float-or-floats))
+               :for i :from 0
+               :for f :across float-or-floats
+               :do (p f (< i last))))
+        ((array * (* *))
+         (destructuring-bind (rows cols) (array-dimensions float-or-floats)
+           (dotimes (r rows)
+             (dotimes (c cols)
+               (p (aref float-or-floats r c) (< c (1- cols))))
+             (when (< r (1- rows))
+               (terpri s)))))))))
 
 
 ;;;; Testing ------------------------------------------------------------------