# HG changeset patch # User Steve Losh # Date 1577038890 18000 # Node ID 72cd66a36853d24c4e73c73be9de9cbd3e4d3845 # Parent 93ba483d9bc9266f33400de2518e828769aa6cfc# Parent dbd7237ece35d4833d1e1f17030b56a84db99f23 Merge. diff -r 93ba483d9bc9 -r 72cd66a36853 rosalind.asd --- a/rosalind.asd Sun Aug 25 22:59:19 2019 -0400 +++ b/rosalind.asd Sun Dec 22 13:21:30 2019 -0500 @@ -24,6 +24,7 @@ :1am :alexandria :cl-digraph + :cl-digraph.dot :cl-ppcre :drakma :iterate diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/dna.lisp --- a/src/problems/dna.lisp Sun Aug 25 22:59:19 2019 -0400 +++ b/src/problems/dna.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -38,4 +38,7 @@ (gethash #\G results 0) (gethash #\T results 0)))) -;; (problem-dna "AT") + +#; Scratch -------------------------------------------------------------------- + +(problem-dna "AT") diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/inod.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/inod.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,24 @@ +(in-package :rosalind) + +;; This one is trivial once you know the closed-form solution of N-2. The +;; intuition for that can come in two parts. +;; +;; First, a rooted binary tree has N-1 internal nodes. This is because at any +;; given point as you're building the tree, you select 2 of the remaining nodes +;; and join them together with an internal node, which reduces the total +;; remaining by 1. You end when there is only one remaining node (the root) and +;; so you did N-1 subtractions. +;; +;; To convert this to an unrooted tree, you replace the root node with an edge, +;; which subtracts one more internal node from the graph. So you're left with +;; N-2 internal nodes. + +(define-problem inod (data stream) + "4" + "2" + (- (read data) 2)) + + +#; Scratch -------------------------------------------------------------------- + +(problem-inod) diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/long.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/long.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,39 @@ +(in-package :rosalind) + +(defparameter *input-long* + ">Rosalind_56 +ATTAGACCTG +>Rosalind_57 +CCTGCCGGAA +>Rosalind_58 +AGACCTGCCG +>Rosalind_59 +GCCGGAATAC") + +(defparameter *output-long* + "ATTAGACCTGCCGGAATAC") + + +(defun overlap (left right) + "Return the number of overlapping characters in `left` and `right`." + (iterate + (for size :from (min (length left) (length right)) :downto 0) + (for i = (- (length left) size)) + (finding size :such-that (string= left right :start1 i :end2 size)))) + +(defun join-overlapping (left right) + "Join `left` and `right` together, overlapping as much as possible." + (concatenate 'string left (subseq right (overlap left right)))) + + +(define-problem long (data stream) + *input-long* + *output-long* + (let* ((dna (mapcar #'cdr (read-fasta-into-alist data))) + (graph (digraph:make-digraph :initial-vertices dna :test #'equal))) + (dolist (left dna) + (dolist (right dna) + (when (and (not (equal left right)) + (> (overlap left right) (floor (length left) 2))) + (digraph:insert-edge graph right left)))) ; reversed edges for toposort + (reduce #'join-overlapping (digraph:topological-sort graph)))) diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/rear.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/rear.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,66 @@ +(in-package :rosalind) + +(defparameter *input-rear* "1 2 3 4 5 6 7 8 9 10 +3 1 5 2 7 4 9 6 10 8 + +3 10 8 2 5 4 7 1 6 9 +5 2 3 1 7 4 10 8 6 9 + +8 6 7 9 4 1 3 10 2 5 +8 2 7 6 9 1 5 3 10 4 + +3 9 10 4 1 8 6 7 5 2 +2 9 8 5 1 7 3 4 6 10 + +1 2 3 4 5 6 7 8 9 10 +1 2 3 4 5 6 7 8 9 10") + +(defparameter *output-rear* "9 4 5 7 0") + +(defun bounds-list (start end &key (min-length 0)) + "Return a list of all subseq bounds between `start` and `end`." + (iterate outer + (for s :from start :below end) + (iterate (for e :from (+ s min-length) :to end) + (in outer (collect (cons s e)))))) + +(defun nreverse-vector (vector &key start end) + (iterate (for i :from start) + (for j :downfrom (1- end)) + (repeat (floor (- end start) 2)) + (rotatef (aref vector i) + (aref vector j))) + vector) + +(defun reverse-vector (vector &key start end) + (nreverse-vector (copy-seq vector) :start start :end end)) + +(defun reversals (vector &key start end) + (iterate (for (s . e) :in (bounds-list start end :min-length 2)) + (collect (reverse-vector vector :start s :end e)))) + +(defun reversals-required (from to) + (iterate + (with remaining = (make-queue)) ; queue of (score . state) + (initially (enqueue (cons 0 from) remaining)) + (for (n . v) = (dequeue remaining)) + (for start = (mismatch v to)) + (for end = (mismatch v to :from-end t)) + (when (null start) + (return n)) + (incf n) + (dolist (r (reversals v :start start :end end)) + (enqueue (cons n r) remaining)))) + +(define-problem rear (data string) + *input-rear* + *output-rear* + (let ((pairs (-<> data + (str:split (format nil "~2%") <>) + (mapcar (curry #'str:split #\newline) <>) + (mapcar (curry #'mapcar #'read-all-from-string) <>) + (mapcar (curry #'mapcar (rcurry #'coerce 'vector)) <>)))) + (iterate (for (from to) :in pairs) + (collect (time (reversals-required from to)))))) + + diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/seto.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/seto.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,42 @@ +(in-package :rosalind) + +(defparameter *input-seto* + "10 +{1, 2, 3, 4, 5} +{2, 8, 5, 10}") + +(defparameter *output-seto* + "{1, 2, 3, 4, 5, 8, 10} +{2, 5} +{1, 3, 4} +{8, 10} +{8, 9, 10, 6, 7} +{1, 3, 4, 6, 7, 9}") + +(defun set-string (set) + ;; Sort for consistent unit test output. + (format nil "{~{~D~^, ~}}" (sort (copy-seq set) #'<))) + +(defun parse-set (string) + (mapcar #'parse-integer (ppcre:all-matches-as-strings "\\d+" string))) + +(define-problem seto (data stream) + *input-seto* + *output-seto* + (let ((u (alexandria:iota (read data) :start 1)) + (a (parse-set (read-line data))) + (b (parse-set (read-line data)))) + (_ (list (union a b) ; to hell with it, we'll just use CL's built-in stuff + (intersection a b) + (set-difference a b) + (set-difference b a) + (set-difference u a) + (set-difference u b)) + (mapcar #'set-string _) + (str:join (string #\newline) _)))) + + +#; Scratch -------------------------------------------------------------------- + +(problem-seto) +;; (solve sset) diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/sset.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/sset.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,16 @@ +(in-package :rosalind) + +;; The cardinality of a power set is 2ⁿ, because you can represent an individual +;; set as a binary string where 1 means the element is included and 0 is not, +;; and there are 2ⁿ possible binary strings of length n. + +(define-problem sset (data stream) + "3" + "8" + (mod (expt 2 (read data)) 1000000)) + + +#; Scratch -------------------------------------------------------------------- + +(problem-sset) +(solve sset) diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/subs.lisp --- a/src/problems/subs.lisp Sun Aug 25 22:59:19 2019 -0400 +++ b/src/problems/subs.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -17,6 +17,3 @@ (collect (1+ pos) :into result) (finally (return (str:join " " result)))))) -;; (problem-subs) -;; (solve subs) - diff -r 93ba483d9bc9 -r 72cd66a36853 src/problems/tree.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/tree.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -0,0 +1,49 @@ +(in-package :rosalind) + +;; For every edge we add we can link up two previously-unconnected trees. If we +;; have N separate trees, we just need N-1 edges to connect them. So the +;; problem reduces itself down to just counting the distinct subgraphs of the +;; given graph. Some day I really need to add that as a utility function to +;; cl-digraph. + +(defparameter *input-tree* + "10 +1 2 +2 8 +4 10 +5 9 +6 10 +7 9") + +(defparameter *output-tree* + "3") + +(defun subgraph-vertices (graph start) + (gathering (digraph:map-depth-first #'gather graph start))) + +(defun subgraphs (graph) + "Return a list of lists of vertices of the distinct subgraphs of `graph`." + (let ((graph (digraph:copy-digraph graph))) + (iterate + (for (values vertex found) = (digraph:arbitrary-vertex graph)) + (while found) + (for subgraph = (subgraph-vertices graph vertex)) + (collect subgraph) + (map nil (curry #'digraph:remove-vertex graph) subgraph)))) + +(define-problem tree (data stream) + *input-tree* + *output-tree* + (let ((graph (digraph:make-digraph + :initial-vertices (alexandria:iota (read data) :start 1)))) + (iterate + (for line :in-stream data :using #'read-line) + (for (a b) = (read-all-from-string line)) + (digraph:insert-edge graph a b) + (digraph:insert-edge graph b a)) + (1- (length (subgraphs graph))))) + + +#; Scratch -------------------------------------------------------------------- + +(problem-tree) diff -r 93ba483d9bc9 -r 72cd66a36853 src/utils.lisp --- a/src/utils.lisp Sun Aug 25 22:59:19 2019 -0400 +++ b/src/utils.lisp Sun Dec 22 13:21:30 2019 -0500 @@ -298,7 +298,7 @@ `vars` must be a list of two symbols that will be bound to the label and data, respectively, on each iteration. - `stream` can be either a string or a character input stream. + `source` can be either a string or a character input stream. `generate` is supported. @@ -405,5 +405,7 @@ (pbcopy (aesthetic-string (funcall problem input))))) (defmacro solve (name) + (assert (symbolp name) () + "Usage: (solve foo)~%foo should not be quoted.") `(solve% ',(symb 'problem- name)))