--- a/rosalind.asd Sat Nov 10 16:14:15 2018 -0500
+++ b/rosalind.asd Sat Nov 10 19:33:59 2018 -0500
@@ -42,6 +42,7 @@
(:file "iprb")
(:file "lcsm")
(:file "lexf")
+ (:file "lgis")
(:file "lia")
(:file "mprt")
(:file "mrna")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lgis.lisp Sat Nov 10 19:33:59 2018 -0500
@@ -0,0 +1,132 @@
+(in-package :rosalind)
+
+(defparameter *input-lgis*
+ "5
+5 1 4 2 3")
+
+(defparameter *output-lgis*
+ "1 2 3
+5 4 3")
+
+
+;; There's an nlog(n) algorithm described at Wikipedia:
+;; https://en.wikipedia.org/wiki/Longest_increasing_subsequence#Efficient_algorithms
+;;
+;; Unfortunately it's pretty painful to read because they insist on using some
+;; strange programming language that uses indentation for control flow but
+;; doesn't have .length on arrays, doesn't have adjustable vectors, etc. What
+;; even is this language? And of course they use one-letter names for
+;; everything, because fuck you. At least they describe the contents of each
+;; auxiliary array precisely. That's nice.
+;;
+;; Before we start: let's define the term "tail" to mean the final element of
+;; a subsequence.
+;;
+;; Let's also pretend we're dealing with the < predicate instead of an arbitrary
+;; one for this description, just to make things a little easier to talk about.
+;;
+;; We set up two arrays before we start:
+;;
+;; * TAIL-INDEXES: An array that contains indexes of tails. To be more precise:
+;; TAIL-INDEXES[n] contains the index of the *minimum* tail for a subsequence
+;; of length N.
+;; * PREDECESSORS: PREDECESSORS[i] stores the index of the predecessor of
+;; SEQ[i] in the resulting subsequence.
+;;
+;; As an example (entries marked with _ are irrelevant, and we'll use characters
+;; as values for clarity):
+;;
+;; SEQUENCE [a, d, c, b]
+;; (indexes) 0 1 2 3
+;;
+;; TAIL-INDEXES [_, 0, 2]
+;; PREDECESSORS [NIL, _, _, 0]
+;; RESULT [a, b]
+;;
+;; A few things to notice:
+;;
+;; * TAIL-INDEXES[0] is garbage, because a subsequence of length zero doesn't
+;; *have* a tail. We could do 1- everywhere to save a word but come on.
+;; * TAIL-INDEXES[1] is 0, the index of the tail of the subsequence of length 1 is 0 (a).
+;; * TAIL-INDEXES[2] is 3, the index of the tail of the subsequence of length 2 is 3 (b).
+;; * TAIL-INDEXES only has elements 0 (garbage), 1, and 2, because the longest
+;; increasing subsequence is only 2 elements long. We have extendable vectors
+;; in Lisp, let's use them instead of doing all the bookkeeping by hand.
+;; * PREDECESSORS[3] is 0, because in the final result the predecessor of SEQ[3] (b) is SEQ[0] (a).
+;; * PREDECESSORS[0] is NIL, because in the final result SEQ[0] (a) is the first element.
+;;
+;; Essentially the algorithm goes like this:
+;;
+;; * Handle the first element by hand.
+;; * Iterate over the sequence:
+;; * Bisect TAIL-INDEXES at each iteration to find where the next element fits.
+;; * Update or extend TAIL-INDEXES with each successive element.
+;; * Extend PREDECESSORS to record the tail index before to the one we just used.
+;; * Once we've filled in the arrays, we can walk through PREDECESSORS, starting
+;; at the final tail index.
+
+(defun longest-monotonic-subsequence
+ (sequence predicate &key (result-type 'list))
+ "Return the longest monotonic subsequence of `sequence`.
+
+ `predicate` must be a comparison predicate like `<` or `>=`.
+
+ If there are multiple longest sequences, an arbitrary one is returned
+
+ Examples:
+
+ (longest-monotonic-subsequence '() #'<) ; => ()
+ (longest-monotonic-subsequence '(1) #'<) ; => (1)
+ (longest-monotonic-subsequence '(2 1) #'<) ; => (1) or (2)
+ (longest-monotonic-subsequence '(1 2) #'<) ; => (1 2)
+ (longest-monotonic-subsequence '(2 1) #'>) ; => (2 1)
+ (longest-monotonic-subsequence '(3 1 1 2) #'<) ; => (1 2)
+ (longest-monotonic-subsequence '(3 1 1 2) #'<=) ; => (1 1 2)
+ (longest-monotonic-subsequence '(3 1 1 2) #'>=) ; => (3 1 1)
+ (longest-monotonic-subsequence \"hello, world!\" #'char<
+ :result-type 'string)
+ ; => \"helor\"
+
+ "
+ (let* ((sequence (coerce sequence 'vector))
+ (n (length sequence))
+ (tail-indexes (make-array (1+ n) :fill-pointer 1))
+ (predecessors (make-array n :fill-pointer 0)))
+ (coerce
+ (if (zerop n)
+ (list) ; just bail early on this edge case
+ (progn
+ ;; Element 0 is always the first tail.
+ (vector-push-extend 0 tail-indexes)
+ (vector-push-extend nil predecessors)
+ (iterate
+ (for value :in-vector sequence :with-index i :from 1)
+ (for (values nil tail-index) =
+ (bisect-right predicate tail-indexes value
+ :start 1 ; ignore the garbage
+ :key (curry #'aref sequence))) ; deref when bisecting
+ (if tail-index
+ (progn
+ ;; Found a more minimal tail for existing subseq
+ (setf (aref tail-indexes tail-index) i)
+ (vector-push-extend (aref tail-indexes (1- tail-index))
+ predecessors))
+ (progn
+ ;; Found the largest tail so far, extend our subseqs
+ (vector-push-extend (vector-last tail-indexes) predecessors)
+ (vector-push-extend i tail-indexes))))
+ (iterate
+ (for i :first (vector-last tail-indexes) :then (aref predecessors i))
+ (while i)
+ (collect (aref sequence i) :at :beginning))))
+ result-type)))
+
+
+(define-problem lgis (data stream)
+ *input-lgis*
+ *output-lgis*
+ (let* ((size (read data))
+ (elements (gimme size (read data))))
+ (with-output-to-string (s)
+ (format s "~{~D~^ ~}~%" (longest-monotonic-subsequence elements #'<))
+ (format s "~{~D~^ ~}" (longest-monotonic-subsequence elements #'>)))))