b8745a9fccd4

LCSM
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Wed, 07 Nov 2018 20:37:21 -0500 (2018-11-08)
parents e279056b1a5b
children fc16ddecf261
branches/tags (none)
files rosalind.asd src/problems/lcsm.lisp src/utils.lisp

Changes

--- a/rosalind.asd	Sat Nov 03 18:20:48 2018 -0400
+++ b/rosalind.asd	Wed Nov 07 20:37:21 2018 -0500
@@ -41,5 +41,6 @@
                                            (:file "fibd")
                                            (:file "cons")
                                            (:file "grph")
-                                           (:file "prtm")))))))
+                                           (:file "prtm")
+                                           (:file "lcsm")))))))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lcsm.lisp	Wed Nov 07 20:37:21 2018 -0500
@@ -0,0 +1,79 @@
+(in-package :rosalind)
+
+;; This one sucked.  We should use suffix trees some day but I just hacked this
+;; together for now.
+
+(defparameter *input-lcsm* ">Rosalind_1
+GATTACA
+>Rosalind_2
+TAGACCA
+>Rosalind_3
+ATACA")
+
+(defun compute-substring-table% (string1 string2)
+  ;; Compute the table of substring lengths.
+  ;;
+  ;;   a b c d e c
+  ;; x 0 0 0 0 0 0
+  ;; a 1 0 0 0 0 0
+  ;; b 0 2 0 0 0 0
+  ;; c 0 0 3 0 0 1
+  ;; e 0 0 0 0 0 1
+  (check-type string1 string)
+  (check-type string2 string)
+  (let ((table (make-array (list (length string1) (length string2)))))
+    (dotimes (i1 (length string1))
+      (dotimes (i2 (length string2))
+        (setf (aref table i1 i2)
+              (if (char= (aref string1 i1) (aref string2 i2))
+                (if (or (zerop i1) (zerop i2))
+                  1
+                  (1+ (aref table (1- i1) (1- i2))))
+                0))))
+    table))
+
+(defun-inline substring-at% (string index length)
+  ;; Given a matching string/table-index and a length, return the substring.
+  ;; The table effectively stores (start, end] but subseq wants [start, end).
+  (let ((end (1+ index)))
+    (subseq string (- end length) end)))
+
+(defun-inline find-substrings-of-length% (table string1 length)
+  ;; Find all the substrings in the table with the given length.
+  (iterate (for (l i1 nil) :in-array table)
+           (when (= length l)
+             (collect (substring-at% string1 i1 length)))))
+
+(defun-inline find-maximum-length% (table)
+  ;; Find the highest length in the table.
+  (iterate (for l :across-flat-array table)
+           (maximizing l)))
+
+(defun longest-common-substrings (string1 string2)
+  "Return a list of the longest common substrings of `string1` and `string2`."
+  (let ((table (compute-substring-table% string1 string2)))
+    (find-substrings-of-length% table string1 (find-maximum-length% table))))
+
+(defun longest (strings)
+  "Return a list of the longest strings in `strings`."
+  (remove-if-not
+    (curry #'= (alexandria:extremum (mapcar #'length strings) #'>))
+    strings :key #'length))
+
+(defun longest-common-substrings-of-any (substrings string)
+  "Return the longest common substrings of `string` and any one of `substrings`."
+  (-<> (iterate
+         (for substring :in substrings)
+         (appending (longest-common-substrings substring string)))
+    longest
+    (remove-duplicates <> :test #'string=)))
+
+(define-problem lcsm (data stream)
+    *input-lcsm*
+    "AC"
+  (let ((lines (iterate (for (nil line) :in-fasta data)
+                        (collect line))))
+    (-<> (reduce #'longest-common-substrings-of-any (rest lines)
+                 :initial-value (list (first lines)))
+      (sort <> #'string<) ; tests
+      first)))
--- a/src/utils.lisp	Sat Nov 03 18:20:48 2018 -0400
+++ b/src/utils.lisp	Wed Nov 07 20:37:21 2018 -0500
@@ -223,7 +223,7 @@
 ;;;; Testing ------------------------------------------------------------------
 (defmacro define-test (problem input output &optional (test 'string=))
   `(test ,(symb 'test- problem)
-     (is (,test ,output (,problem ,input)))))
+     (is (,test ,output (aesthetic-string (,problem ,input))))))
 
 (defun run-tests ()
   (1am:run))
@@ -241,7 +241,7 @@
            (setf ,arg ,(ecase type
                          (string `(ensure-string ,arg))
                          (stream `(ensure-stream ,arg))))
-           (aesthetic-string (progn ,@body)))
+           (progn ,@body))
          (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
          (define-test ,symbol ,sample-input ,sample-output)
          ',symbol))))
@@ -251,7 +251,7 @@
 
 (defun solve% (problem)
   (with-open-file (input (problem-data-path problem))
-    (pbcopy (funcall problem input))))
+    (pbcopy (aesthetic-string (funcall problem input)))))
 
 (defmacro solve (name)
   `(solve% ',(symb 'problem- name)))