72cd66a36853

Merge.
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sun, 22 Dec 2019 13:21:30 -0500
parents 93ba483d9bc9 (current diff) dbd7237ece35 (diff)
children 892a16d8007e
branches/tags (none)
files src/utils.lisp

Changes

--- a/rosalind.asd	Sun Aug 25 22:59:19 2019 -0400
+++ b/rosalind.asd	Sun Dec 22 13:21:30 2019 -0500
@@ -24,6 +24,7 @@
                :1am
                :alexandria
                :cl-digraph
+               :cl-digraph.dot
                :cl-ppcre
                :drakma
                :iterate
--- a/src/problems/dna.lisp	Sun Aug 25 22:59:19 2019 -0400
+++ b/src/problems/dna.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -38,4 +38,7 @@
             (gethash #\G results 0)
             (gethash #\T results 0))))
 
-;; (problem-dna "AT")
+
+#; Scratch --------------------------------------------------------------------
+
+(problem-dna "AT")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/inod.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,24 @@
+(in-package :rosalind)
+
+;; This one is trivial once you know the closed-form solution of N-2.  The
+;; intuition for that can come in two parts.
+;;
+;; First, a rooted binary tree has N-1 internal nodes.  This is because at any
+;; given point as you're building the tree, you select 2 of the remaining nodes
+;; and join them together with an internal node, which reduces the total
+;; remaining by 1.  You end when there is only one remaining node (the root) and
+;; so you did N-1 subtractions.
+;;
+;; To convert this to an unrooted tree, you replace the root node with an edge,
+;; which subtracts one more internal node from the graph.  So you're left with
+;; N-2 internal nodes.
+
+(define-problem inod (data stream)
+    "4"
+    "2"
+  (- (read data) 2))
+
+
+#; Scratch --------------------------------------------------------------------
+
+(problem-inod)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/long.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,39 @@
+(in-package :rosalind)
+
+(defparameter *input-long*
+  ">Rosalind_56
+ATTAGACCTG
+>Rosalind_57
+CCTGCCGGAA
+>Rosalind_58
+AGACCTGCCG
+>Rosalind_59
+GCCGGAATAC")
+
+(defparameter *output-long*
+  "ATTAGACCTGCCGGAATAC")
+
+
+(defun overlap (left right)
+  "Return the number of overlapping characters in `left` and `right`."
+  (iterate
+    (for size :from (min (length left) (length right)) :downto 0)
+    (for i = (- (length left) size))
+    (finding size :such-that (string= left right :start1 i :end2 size))))
+
+(defun join-overlapping (left right)
+  "Join `left` and `right` together, overlapping as much as possible."
+  (concatenate 'string left (subseq right (overlap left right))))
+
+
+(define-problem long (data stream)
+    *input-long*
+    *output-long*
+  (let* ((dna (mapcar #'cdr (read-fasta-into-alist data)))
+         (graph (digraph:make-digraph :initial-vertices dna :test #'equal)))
+    (dolist (left dna)
+      (dolist (right dna)
+        (when (and (not (equal left right))
+                   (> (overlap left right) (floor (length left) 2)))
+          (digraph:insert-edge graph right left)))) ; reversed edges for toposort
+    (reduce #'join-overlapping (digraph:topological-sort graph))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/rear.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,66 @@
+(in-package :rosalind)
+
+(defparameter *input-rear* "1 2 3 4 5 6 7 8 9 10
+3 1 5 2 7 4 9 6 10 8
+
+3 10 8 2 5 4 7 1 6 9
+5 2 3 1 7 4 10 8 6 9
+
+8 6 7 9 4 1 3 10 2 5
+8 2 7 6 9 1 5 3 10 4
+
+3 9 10 4 1 8 6 7 5 2
+2 9 8 5 1 7 3 4 6 10
+
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10")
+
+(defparameter *output-rear* "9 4 5 7 0")
+
+(defun bounds-list (start end &key (min-length 0))
+  "Return a list of all subseq bounds between `start` and `end`."
+  (iterate outer
+    (for s :from start :below end)
+    (iterate (for e :from (+ s min-length) :to end)
+      (in outer (collect (cons s e))))))
+
+(defun nreverse-vector (vector &key start end)
+  (iterate (for i :from start)
+           (for j :downfrom (1- end))
+           (repeat (floor (- end start) 2))
+           (rotatef (aref vector i)
+                    (aref vector j)))
+  vector)
+
+(defun reverse-vector (vector &key start end)
+  (nreverse-vector (copy-seq vector) :start start :end end))
+
+(defun reversals (vector &key start end)
+  (iterate (for (s . e) :in (bounds-list start end :min-length 2))
+           (collect (reverse-vector vector :start s :end e))))
+
+(defun reversals-required (from to)
+  (iterate
+    (with remaining = (make-queue)) ; queue of (score . state)
+    (initially (enqueue (cons 0 from) remaining))
+    (for (n . v) = (dequeue remaining))
+    (for start = (mismatch v to))
+    (for end = (mismatch v to :from-end t))
+    (when (null start)
+      (return n))
+    (incf n)
+    (dolist (r (reversals v :start start :end end))
+      (enqueue (cons n r) remaining))))
+
+(define-problem rear (data string)
+    *input-rear*
+    *output-rear*
+  (let ((pairs (-<> data
+                 (str:split (format nil "~2%") <>)
+                 (mapcar (curry #'str:split #\newline) <>)
+                 (mapcar (curry #'mapcar #'read-all-from-string) <>)
+                 (mapcar (curry #'mapcar (rcurry #'coerce 'vector)) <>))))
+    (iterate (for (from to) :in pairs)
+             (collect (time (reversals-required from to))))))
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/seto.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,42 @@
+(in-package :rosalind)
+
+(defparameter *input-seto*
+  "10
+{1, 2, 3, 4, 5}
+{2, 8, 5, 10}")
+
+(defparameter *output-seto*
+  "{1, 2, 3, 4, 5, 8, 10}
+{2, 5}
+{1, 3, 4}
+{8, 10}
+{8, 9, 10, 6, 7}
+{1, 3, 4, 6, 7, 9}")
+
+(defun set-string (set)
+  ;; Sort for consistent unit test output.
+  (format nil "{~{~D~^, ~}}" (sort (copy-seq set) #'<)))
+
+(defun parse-set (string)
+  (mapcar #'parse-integer (ppcre:all-matches-as-strings "\\d+" string)))
+
+(define-problem seto (data stream)
+    *input-seto*
+    *output-seto*
+  (let ((u (alexandria:iota (read data) :start 1))
+        (a (parse-set (read-line data)))
+        (b (parse-set (read-line data))))
+    (_ (list (union a b) ; to hell with it, we'll just use CL's built-in stuff
+             (intersection a b)
+             (set-difference a b)
+             (set-difference b a)
+             (set-difference u a)
+             (set-difference u b))
+      (mapcar #'set-string _)
+      (str:join (string #\newline) _))))
+
+
+#; Scratch --------------------------------------------------------------------
+
+(problem-seto)
+;; (solve sset)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/sset.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,16 @@
+(in-package :rosalind)
+
+;; The cardinality of a power set is 2ⁿ, because you can represent an individual
+;; set as a binary string where 1 means the element is included and 0 is not,
+;; and there are 2ⁿ possible binary strings of length n.
+
+(define-problem sset (data stream)
+    "3"
+    "8"
+  (mod (expt 2 (read data)) 1000000))
+
+
+#; Scratch --------------------------------------------------------------------
+
+(problem-sset)
+(solve sset)
--- a/src/problems/subs.lisp	Sun Aug 25 22:59:19 2019 -0400
+++ b/src/problems/subs.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -17,6 +17,3 @@
       (collect (1+ pos) :into result)
       (finally (return (str:join " " result))))))
 
-;; (problem-subs)
-;; (solve subs)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/tree.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -0,0 +1,49 @@
+(in-package :rosalind)
+
+;; For every edge we add we can link up two previously-unconnected trees.  If we
+;; have N separate trees, we just need N-1 edges to connect them.  So the
+;; problem reduces itself down to just counting the distinct subgraphs of the
+;; given graph.  Some day I really need to add that as a utility function to
+;; cl-digraph.
+
+(defparameter *input-tree*
+  "10
+1 2
+2 8
+4 10
+5 9
+6 10
+7 9")
+
+(defparameter *output-tree*
+  "3")
+
+(defun subgraph-vertices (graph start)
+  (gathering (digraph:map-depth-first #'gather graph start)))
+
+(defun subgraphs (graph)
+  "Return a list of lists of vertices of the distinct subgraphs of `graph`."
+  (let ((graph (digraph:copy-digraph graph)))
+    (iterate
+      (for (values vertex found) = (digraph:arbitrary-vertex graph))
+      (while found)
+      (for subgraph = (subgraph-vertices graph vertex))
+      (collect subgraph)
+      (map nil (curry #'digraph:remove-vertex graph) subgraph))))
+
+(define-problem tree (data stream)
+    *input-tree*
+    *output-tree*
+  (let ((graph (digraph:make-digraph
+                 :initial-vertices (alexandria:iota (read data) :start 1))))
+    (iterate
+      (for line :in-stream data :using #'read-line)
+      (for (a b) = (read-all-from-string line))
+      (digraph:insert-edge graph a b)
+      (digraph:insert-edge graph b a))
+    (1- (length (subgraphs graph)))))
+
+
+#; Scratch --------------------------------------------------------------------
+
+(problem-tree)
--- a/src/utils.lisp	Sun Aug 25 22:59:19 2019 -0400
+++ b/src/utils.lisp	Sun Dec 22 13:21:30 2019 -0500
@@ -298,7 +298,7 @@
   `vars` must be a list of two symbols that will be bound to the label and data,
   respectively, on each iteration.
 
-  `stream` can be either a string or a character input stream.
+  `source` can be either a string or a character input stream.
 
   `generate` is supported.
 
@@ -405,5 +405,7 @@
     (pbcopy (aesthetic-string (funcall problem input)))))
 
 (defmacro solve (name)
+  (assert (symbolp name) ()
+    "Usage: (solve foo)~%foo should not be quoted.")
   `(solve% ',(symb 'problem- name)))