f94bcb404676

LONG
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Fri, 01 Mar 2019 15:31:38 -0500
parents 64aa58880d55
children 814719fdaa0d
branches/tags (none)
files rosalind.asd src/problems/long.lisp src/utils.lisp

Changes

--- a/rosalind.asd	Sat Feb 23 12:42:13 2019 -0500
+++ b/rosalind.asd	Fri Mar 01 15:31:38 2019 -0500
@@ -24,6 +24,7 @@
                :1am
                :alexandria
                :cl-digraph
+               :cl-digraph.dot
                :cl-ppcre
                :drakma
                :iterate
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/long.lisp	Fri Mar 01 15:31:38 2019 -0500
@@ -0,0 +1,39 @@
+(in-package :rosalind)
+
+(defparameter *input-long*
+  ">Rosalind_56
+ATTAGACCTG
+>Rosalind_57
+CCTGCCGGAA
+>Rosalind_58
+AGACCTGCCG
+>Rosalind_59
+GCCGGAATAC")
+
+(defparameter *output-long*
+  "ATTAGACCTGCCGGAATAC")
+
+
+(defun overlap (left right)
+  "Return the number of overlapping characters in `left` and `right`."
+  (iterate
+    (for size :from (min (length left) (length right)) :downto 0)
+    (for i = (- (length left) size))
+    (finding size :such-that (string= left right :start1 i :end2 size))))
+
+(defun join-overlapping (left right)
+  "Join `left` and `right` together, overlapping as much as possible."
+  (concatenate 'string left (subseq right (overlap left right))))
+
+
+(define-problem long (data stream)
+    *input-long*
+    *output-long*
+  (let* ((dna (mapcar #'cdr (read-fasta-into-alist data)))
+         (graph (digraph:make-digraph :initial-vertices dna :test #'equal)))
+    (dolist (left dna)
+      (dolist (right dna)
+        (when (and (not (equal left right))
+                   (> (overlap left right) (floor (length left) 2)))
+          (digraph:insert-edge graph right left)))) ; reversed edges for toposort
+    (reduce #'join-overlapping (digraph:topological-sort graph))))
--- a/src/utils.lisp	Sat Feb 23 12:42:13 2019 -0500
+++ b/src/utils.lisp	Fri Mar 01 15:31:38 2019 -0500
@@ -298,7 +298,7 @@
   `vars` must be a list of two symbols that will be bound to the label and data,
   respectively, on each iteration.
 
-  `stream` can be either a string or a character input stream.
+  `source` can be either a string or a character input stream.
 
   `generate` is supported.