# HG changeset patch # User Steve Losh # Date 1541641041 18000 # Node ID b8745a9fccd422270c07b9100f4fcd094590c695 # Parent e279056b1a5b2935f6e16694234803e68b0fa5f1 LCSM diff -r e279056b1a5b -r b8745a9fccd4 rosalind.asd --- a/rosalind.asd Sat Nov 03 18:20:48 2018 -0400 +++ b/rosalind.asd Wed Nov 07 20:37:21 2018 -0500 @@ -41,5 +41,6 @@ (:file "fibd") (:file "cons") (:file "grph") - (:file "prtm"))))))) + (:file "prtm") + (:file "lcsm"))))))) diff -r e279056b1a5b -r b8745a9fccd4 src/problems/lcsm.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/lcsm.lisp Wed Nov 07 20:37:21 2018 -0500 @@ -0,0 +1,79 @@ +(in-package :rosalind) + +;; This one sucked. We should use suffix trees some day but I just hacked this +;; together for now. + +(defparameter *input-lcsm* ">Rosalind_1 +GATTACA +>Rosalind_2 +TAGACCA +>Rosalind_3 +ATACA") + +(defun compute-substring-table% (string1 string2) + ;; Compute the table of substring lengths. + ;; + ;; a b c d e c + ;; x 0 0 0 0 0 0 + ;; a 1 0 0 0 0 0 + ;; b 0 2 0 0 0 0 + ;; c 0 0 3 0 0 1 + ;; e 0 0 0 0 0 1 + (check-type string1 string) + (check-type string2 string) + (let ((table (make-array (list (length string1) (length string2))))) + (dotimes (i1 (length string1)) + (dotimes (i2 (length string2)) + (setf (aref table i1 i2) + (if (char= (aref string1 i1) (aref string2 i2)) + (if (or (zerop i1) (zerop i2)) + 1 + (1+ (aref table (1- i1) (1- i2)))) + 0)))) + table)) + +(defun-inline substring-at% (string index length) + ;; Given a matching string/table-index and a length, return the substring. + ;; The table effectively stores (start, end] but subseq wants [start, end). + (let ((end (1+ index))) + (subseq string (- end length) end))) + +(defun-inline find-substrings-of-length% (table string1 length) + ;; Find all the substrings in the table with the given length. + (iterate (for (l i1 nil) :in-array table) + (when (= length l) + (collect (substring-at% string1 i1 length))))) + +(defun-inline find-maximum-length% (table) + ;; Find the highest length in the table. + (iterate (for l :across-flat-array table) + (maximizing l))) + +(defun longest-common-substrings (string1 string2) + "Return a list of the longest common substrings of `string1` and `string2`." + (let ((table (compute-substring-table% string1 string2))) + (find-substrings-of-length% table string1 (find-maximum-length% table)))) + +(defun longest (strings) + "Return a list of the longest strings in `strings`." + (remove-if-not + (curry #'= (alexandria:extremum (mapcar #'length strings) #'>)) + strings :key #'length)) + +(defun longest-common-substrings-of-any (substrings string) + "Return the longest common substrings of `string` and any one of `substrings`." + (-<> (iterate + (for substring :in substrings) + (appending (longest-common-substrings substring string))) + longest + (remove-duplicates <> :test #'string=))) + +(define-problem lcsm (data stream) + *input-lcsm* + "AC" + (let ((lines (iterate (for (nil line) :in-fasta data) + (collect line)))) + (-<> (reduce #'longest-common-substrings-of-any (rest lines) + :initial-value (list (first lines))) + (sort <> #'string<) ; tests + first))) diff -r e279056b1a5b -r b8745a9fccd4 src/utils.lisp --- a/src/utils.lisp Sat Nov 03 18:20:48 2018 -0400 +++ b/src/utils.lisp Wed Nov 07 20:37:21 2018 -0500 @@ -223,7 +223,7 @@ ;;;; Testing ------------------------------------------------------------------ (defmacro define-test (problem input output &optional (test 'string=)) `(test ,(symb 'test- problem) - (is (,test ,output (,problem ,input))))) + (is (,test ,output (aesthetic-string (,problem ,input)))))) (defun run-tests () (1am:run)) @@ -241,7 +241,7 @@ (setf ,arg ,(ecase type (string `(ensure-string ,arg)) (stream `(ensure-stream ,arg)))) - (aesthetic-string (progn ,@body))) + (progn ,@body)) (setf (get ',symbol 'rosalind-name) ,(string-downcase name)) (define-test ,symbol ,sample-input ,sample-output) ',symbol)))) @@ -251,7 +251,7 @@ (defun solve% (problem) (with-open-file (input (problem-data-path problem)) - (pbcopy (funcall problem input)))) + (pbcopy (aesthetic-string (funcall problem input))))) (defmacro solve (name) `(solve% ',(symb 'problem- name)))