b1baea60c24f

Merge.
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sun, 16 Dec 2018 18:01:43 -0500
parents 5d71b5f0dfb5 (diff) 1d0852c279f7 (current diff)
children df2880a47218
branches/tags (none)
files rosalind.asd

Changes

--- a/.lispwords	Sat Dec 15 16:56:57 2018 -0500
+++ b/.lispwords	Sun Dec 16 18:01:43 2018 -0500
@@ -1,3 +1,5 @@
 (1 codon-case)
 (1 labels-memoized)
 (4 define-problem)
+(1 do-sum Σ)
+(1 do-product Π)
--- a/rosalind.asd	Sat Dec 15 16:56:57 2018 -0500
+++ b/rosalind.asd	Sun Dec 16 18:01:43 2018 -0500
@@ -24,6 +24,8 @@
                :1am
                :alexandria
                :cl-digraph
+               :cl-ppcre
+               :drakma
                :iterate
                :losh
                :str
--- a/src/problems/grph.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/src/problems/grph.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -1,6 +1,7 @@
 (in-package :rosalind)
 
-(defparameter *input-grph* ">Rosalind_0498
+(defparameter *input-grph*
+  ">Rosalind_0498
 AAATAAA
 >Rosalind_2391
 AAATTTT
@@ -11,8 +12,9 @@
 >Rosalind_5013
 GGGTGGG")
 
-(defparameter *output-grph* "Rosalind_0498 Rosalind_2391
-Rosalind_0498 Rosalind_0442
+(defparameter *output-grph*
+  "Rosalind_0498 Rosalind_0442
+Rosalind_0498 Rosalind_2391
 Rosalind_2391 Rosalind_2323
 ")
 
@@ -48,9 +50,10 @@
              data)
     ;; (ql:quickload :cl-digraph.dot)
     ;; (digraph.dot:draw graph)
-    (with-output-to-string (s)
-      (iterate (for (l . r) :in (digraph:edges graph))
-               (format s "~A ~A~%" l r)))))
+    (-<> (iterate (for (l . r) :in (digraph:edges graph))
+                  (collect (format nil "~A ~A~%" l r)))
+      (sort <> #'string<)
+      (str:join nil <>))))
 
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lexf.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,42 @@
+(in-package :rosalind)
+
+(defparameter *input-lexf*
+  "A C G T
+2")
+
+(defparameter *output-lexf*
+  "AA
+AC
+AG
+AT
+CA
+CC
+CG
+CT
+GA
+GC
+GG
+GT
+TA
+TC
+TG
+TT
+")
+
+
+(define-problem lexf (data stream)
+    *input-lexf*
+    *output-lexf*
+  (let* ((alphabet (sort (remove #\space (read-line data)) #'char<))
+         (n (read data))
+         (string (make-string n)))
+    (with-output-to-string (s)
+      (recursively ((n n)
+                    (i 0))
+        (if (zerop n)
+          (progn (write-string string s)
+                 (terpri s))
+          (map nil (lambda (ch)
+                     (setf (aref string i) ch)
+                     (recur (1- n) (1+ i)))
+               alphabet))))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lgis.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,132 @@
+(in-package :rosalind)
+
+(defparameter *input-lgis*
+  "5
+5 1 4 2 3")
+
+(defparameter *output-lgis*
+  "1 2 3
+5 4 3")
+
+
+;; There's an nlog(n) algorithm described at Wikipedia:
+;; https://en.wikipedia.org/wiki/Longest_increasing_subsequence#Efficient_algorithms
+;;
+;; Unfortunately it's pretty painful to read because they insist on using some
+;; strange programming language that uses indentation for control flow but
+;; doesn't have .length on arrays, doesn't have adjustable vectors, etc.  What
+;; even is this language?  And of course they use one-letter names for
+;; everything, because fuck you.  At least they describe the contents of each
+;; auxiliary array precisely.  That's nice.
+;;
+;; Before we start: let's define the term "tail" to mean the final element of
+;; a subsequence.
+;;
+;; Let's also pretend we're dealing with the < predicate instead of an arbitrary
+;; one for this description, just to make things a little easier to talk about.
+;;
+;; We set up two arrays before we start:
+;;
+;; * TAIL-INDEXES: An array that contains indexes of tails.  To be more precise:
+;;   TAIL-INDEXES[n] contains the index of the *minimum* tail for a subsequence
+;;   of length N.
+;; * PREDECESSORS: PREDECESSORS[i] stores the index of the predecessor of
+;;   SEQ[i] in the resulting subsequence.
+;;
+;; As an example (entries marked with _ are irrelevant, and we'll use characters
+;; as values for clarity):
+;;
+;;     SEQUENCE     [a, d, c, b]
+;;       (indexes)   0  1  2  3
+;;
+;;     TAIL-INDEXES [_, 0, 2]
+;;     PREDECESSORS [NIL, _, _, 0]
+;;     RESULT       [a, b]
+;;
+;; A few things to notice:
+;;
+;; * TAIL-INDEXES[0] is garbage, because a subsequence of length zero doesn't
+;;   *have* a tail.  We could do 1- everywhere to save a word but come on.
+;; * TAIL-INDEXES[1] is 0, the index of the tail of the subsequence of length 1 is 0 (a).
+;; * TAIL-INDEXES[2] is 3, the index of the tail of the subsequence of length 2 is 3 (b).
+;; * TAIL-INDEXES only has elements 0 (garbage), 1, and 2, because the longest
+;;   increasing subsequence is only 2 elements long.  We have extendable vectors
+;;   in Lisp, let's use them instead of doing all the bookkeeping by hand.
+;; * PREDECESSORS[3] is   0, because in the final result the predecessor of SEQ[3] (b) is SEQ[0] (a).
+;; * PREDECESSORS[0] is NIL, because in the final result SEQ[0] (a) is the first element.
+;;
+;; Essentially the algorithm goes like this:
+;;
+;; * Handle the first element by hand.
+;; * Iterate over the sequence:
+;;   * Bisect TAIL-INDEXES at each iteration to find where the next element fits.
+;;   * Update or extend TAIL-INDEXES with each successive element.
+;;   * Extend PREDECESSORS to record the tail index before to the one we just used.
+;; * Once we've filled in the arrays, we can walk through PREDECESSORS, starting
+;;   at the final tail index.
+
+(defun longest-monotonic-subsequence
+    (sequence predicate &key (result-type 'list))
+  "Return the longest monotonic subsequence of `sequence`.
+
+  `predicate` must be a comparison predicate like `<` or `>=`.
+
+  If there are multiple longest sequences, an arbitrary one is returned
+
+  Examples:
+
+    (longest-monotonic-subsequence '()        #'<)            ; => ()
+    (longest-monotonic-subsequence '(1)       #'<)            ; => (1)
+    (longest-monotonic-subsequence '(2 1)     #'<)            ; => (1) or (2)
+    (longest-monotonic-subsequence '(1 2)     #'<)            ; => (1 2)
+    (longest-monotonic-subsequence '(2 1)     #'>)            ; => (2 1)
+    (longest-monotonic-subsequence '(3 1 1 2) #'<)            ; => (1 2)
+    (longest-monotonic-subsequence '(3 1 1 2) #'<=)           ; => (1 1 2)
+    (longest-monotonic-subsequence '(3 1 1 2) #'>=)           ; => (3 1 1)
+    (longest-monotonic-subsequence \"hello, world!\" #'char<
+                                   :result-type 'string)
+    ; => \"helor\"
+
+  "
+  (let* ((sequence (coerce sequence 'vector))
+         (n (length sequence))
+         (tail-indexes (make-array (1+ n) :fill-pointer 1))
+         (predecessors (make-array n :fill-pointer 0)))
+    (coerce
+      (if (zerop n)
+        (list) ; just bail early on this edge case
+        (progn
+          ;; Element 0 is always the first tail.
+          (vector-push-extend 0 tail-indexes)
+          (vector-push-extend nil predecessors)
+          (iterate
+            (for value :in-vector sequence :with-index i :from 1)
+            (for (values nil tail-index) =
+                 (bisect-right predicate tail-indexes value
+                               :start 1 ; ignore the garbage
+                               :key (curry #'aref sequence))) ; deref when bisecting
+            (if tail-index
+              (progn
+                ;; Found a more minimal tail for existing subseq
+                (setf (aref tail-indexes tail-index) i)
+                (vector-push-extend (aref tail-indexes (1- tail-index))
+                                    predecessors))
+              (progn
+                ;; Found the largest tail so far, extend our subseqs
+                (vector-push-extend (vector-last tail-indexes) predecessors)
+                (vector-push-extend i tail-indexes))))
+          (iterate
+            (for i :first (vector-last tail-indexes) :then (aref predecessors i))
+            (while i)
+            (collect (aref sequence i) :at :beginning))))
+      result-type)))
+
+
+(define-problem lgis (data stream)
+    *input-lgis*
+    *output-lgis*
+  (let* ((size (read data))
+         (elements (gimme size (read data))))
+    (with-output-to-string (s)
+      (format s "~{~D~^ ~}~%" (longest-monotonic-subsequence elements #'<))
+      (format s "~{~D~^ ~}" (longest-monotonic-subsequence elements #'>)))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/lia.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,109 @@
+(in-package :rosalind)
+
+;; When a heterozygous organism mates, its offspring have a 50% chance to be
+;; heterozygous themselves, *regardless of what the other mate happens to be*:
+;;
+;;         A  a       A  a       A  a
+;;      A AA Aa    A AA Aa    a aA aa
+;;      A AA Aa    a Aa aa    a aA aa
+;;
+;; Because everyone breeds with an Aa Bb mate, we don't need to worry about
+;; tracking populations along the way.  Every child has a 1/2 chance of being
+;; Aa, and likewise a 1/2 chance of being Bb.
+;;
+;; Because A's and B's are independent, this means there's a 1/2 * 1/2 = 1/4
+;; chance of any given child being Aa Bb.
+
+(defmacro do-sum ((var from to) &body body)
+  "Sum `body` with `var` iterating over `[from, to]`.
+
+  It's just Σ:
+
+      to
+      ===
+      \
+       >   body
+      /
+      ===
+      n = from
+
+  "
+  (once-only (to)
+    (with-gensyms (result)
+      `(do ((,var ,from (1+ ,var))
+            (,result 0))
+         ((> ,var ,to) ,result)
+         (incf ,result (progn ,@body))))))
+
+(defmacro do-product ((var from to) &body body)
+  "Multiply `body` with `var` iterating over `[from, to]`.
+
+  It's just Π:
+
+       to
+      =====
+       | |
+       | |  body
+       | |
+      n = from
+
+  "
+  (once-only (to)
+    (with-gensyms (result)
+      `(do ((,var ,from (1+ ,var))
+            (,result 1))
+         ((> ,var ,to) ,result)
+         (setf ,result (* ,result (progn ,@body)))))))
+
+
+(defmacro Σ (bindings &body body) ;; lol
+  `(do-sum ,bindings ,@body))
+
+(defmacro Π (bindings &body body) ;; lol
+  `(do-product ,bindings ,@body))
+
+
+(defun binomial-coefficient (n k)
+  "Return `n` choose `k`."
+  ;; https://en.wikipedia.org/wiki/Binomial_coefficient#Multiplicative_formula
+  (Π (i 1 k)
+    (/ (- (1+ n) i) i)))
+
+(defun bernoulli-exactly (successes trials success-probability)
+  "Return the probability of exactly `successes` in `trials` Bernoulli trials.
+
+  Returns the probability of getting exactly `n` successes out of `trials`
+  Bernoulli trials with `success-probability`.
+
+  "
+  ;; For a group of N trials with success/failure probabilities s/f, any given
+  ;; ordering of exactly S successes and F failures will have probability:
+  ;;
+  ;;     s * s * s * … * f * f * f
+  ;;
+  ;; There are N choose S possible orderings (or N choose F, it doesn't matter),
+  ;; so we just sum up the probabilities of all of them.
+  (let ((failures (- trials successes))
+        (failure-probability (- 1 success-probability)))
+    (* (binomial-coefficient trials successes)
+       (expt success-probability successes)
+       (expt failure-probability failures))))
+
+(defun bernoulli-at-least (successes trials success-probability)
+  "Return the probability of at least `successes` in `trials` Bernoulli trials.
+
+  Returns the probability of getting at least `n` successes out of `trials`
+  Bernoulli trials with `success-probability`.
+
+  "
+  ;; P(≥S) = P(=S) + P(=S+1) + … + P(N)
+  (Σ (n successes trials)
+    (bernoulli-exactly n trials success-probability)))
+
+(define-problem lia (data stream)
+    "2 1"
+    "0.684"
+  (let* ((generations (read data))
+         (target (read data))
+         (population (expt 2 generations)))
+    (format nil "~,3F" (bernoulli-at-least target population 1/4))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/mprt.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,68 @@
+(in-package :rosalind)
+
+;; This was pretty simple, except for discovering that cl-ppcre's all-matches
+;; function skips overlapping matches.  Otherwise we just convert the motif to
+;; a regex and handle grabbing the data from Uniprot (which is straightforward
+;; but can be slow).
+
+(defparameter *input-mprt*
+  "A2Z669
+B5ZC00
+P07204_TRBM_HUMAN
+P20840_SAG1_YEAST")
+
+(defparameter *output-mprt*
+  "B5ZC00
+85 118 142 306 395
+P07204_TRBM_HUMAN
+47 115 116 382 409
+P20840_SAG1_YEAST
+79 109 135 248 306 348 364 402 485 501 614
+")
+
+(defparameter *motif-n-glycosylation* "N{P}[ST]{P}")
+
+(defun motif-to-regex (motif)
+  "Turn a protein motif shorthand into a PPCRE scanner."
+  (-<> motif
+    ;; All we have to do is turn {X} into [^X] and compile.
+    (ppcre:regex-replace-all "[{]" <> "[^")
+    (substitute #\] #\} <>)
+    ppcre:create-scanner))
+
+(defun all-matches-dammit (regex target-string)
+  "Return a list of start and end positions of all matches of `regex` on `target-string`.
+
+  Unlike `ppcre:all-matches` this will return ALL matches, even if they're
+  overlapping.  Example:
+
+    (all-matches-dammit \"a..\" \"aabc\")
+    ; =>
+    ; (0 3 1 4)
+
+    (ppcre:all-matches \"a..\" \"aabc\")
+    ; =>
+    ; (0 3) ; dammit
+
+  "
+  ;; cl-ppcre
+  (iterate
+    (with i = 0)
+    (for (values start end) = (ppcre:scan regex target-string :start i))
+    (while start)
+    (collect start)
+    (collect end)
+    (setf i (1+ start))))
+
+
+(define-problem mprt (data stream)
+    *input-mprt*
+    *output-mprt*
+  (with-output-to-string (s)
+    (iterate
+      (with n-glycosylation = (motif-to-regex *motif-n-glycosylation*))
+      (for id :in-stream data :using #'read-line)
+      (for (nil . protein) = (uniprot id))
+      (for matches = (all-matches-dammit n-glycosylation protein))
+      (when matches
+        (format s "~A~%~{~D~*~^ ~}~%" id (mapcar #'1+ matches))))))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/orf.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,34 @@
+(in-package :rosalind)
+
+(defparameter *input-orf*
+  ">Rosalind_99
+AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG")
+
+(defparameter *output-orf*
+  "M
+MTPRLGLESLLE
+MGMTPRLGLESLLE
+MLLGSFRLIPKETLIQVAGSSPCNLS")
+
+
+(defun translate-all (rna)
+  "Return all possible proteins that can be translated from `rna`."
+  (iterate
+    (for start :first 0 :then (1+ protein-start))
+    (for (values protein protein-start) = (translate rna :start start))
+    (while protein)
+    (collect protein)))
+
+(define-problem orf (data stream)
+    *input-orf*
+    *output-orf*
+  (let* ((dna (cdr (first (read-fasta-into-alist data))))
+         (rna1 (transcribe dna))
+         (rna2 (transcribe (reverse-complement dna))))
+    (-<> (append (translate-all rna1)
+                 (translate-all rna2))
+      (remove-duplicates <> :test #'string=)
+      (sort <> #'< :key #'length)
+      (format nil "~{~A~^~%~}" <>))))
+
+
--- a/src/problems/prot.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/src/problems/prot.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -58,11 +58,29 @@
     ("UGG" #\W) ("CGG" #\R) ("AGG" #\R) ("GGG" #\G)))
 
 (defun translate (rna &key (start 0))
-  "Translate a string of RNA bases into a protein string of amino acids."
-  (iterate (for i :from (search "AUG" rna :start2 start) :by 3)
-           (for protein = (codon-to-protein rna i))
-           (while protein)
-           (collect protein :result-type 'string)))
+  "Translate a string of RNA bases into a protein string of amino acids.
+
+  `rna` will be searched (beginning at `start`) for a start codon and
+  translation will proceed from there.  If no start codon occurs after `start`
+  then `nil` will be returned.
+
+  Once a start codon has been found, translation proceeds to the next stop
+  codon.  If no stop codon is present, `nil` will be returned.
+
+  Otherwise two values are returned: the protein string and the index into `rna`
+  where it started.
+
+  "
+  (when-let ((start (search "AUG" rna :start2 start)))
+    (values
+      (iterate (with limit = (- (length rna) 3))
+               (for i :from start :by 3)
+               (when (> i limit)
+                 (return-from translate (values nil nil)))
+               (for protein = (codon-to-protein rna i))
+               (while protein)
+               (collect protein :result-type 'string))
+      start)))
 
 
 (define-problem prot (data string)
--- a/src/problems/revc.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/src/problems/revc.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -28,7 +28,7 @@
 ;; polarized ends, with one end being called 3′ and the other being 5′, but I'm
 ;; not 100% sure.
 
-(defun reverse-complement (dna)
+(defun nreverse-complement (dna)
   (flet ((dna-complement (base)
            (case base
              (#\A #\T)
@@ -38,9 +38,12 @@
     (map-into dna #'dna-complement dna)
     (nreverse dna)))
 
+(defun reverse-complement (dna)
+  (nreverse-complement (copy-seq dna)))
+
 (define-problem revc (data string)
     "AAAACCCGGT"
     "ACCGGGTTTT"
   "Return the reverse complement of `data`."
-  (reverse-complement (delete #\newline data)))
+  (nreverse-complement (delete #\newline data)))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/revp.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -0,0 +1,51 @@
+(in-package :rosalind)
+
+;; The problem explanation provided a clever trick: you can cut the comparison
+;; size in half by comparing the first half of the string to the reverse
+;; complement of the second half, instead of comparing the entire thing.
+;;
+;; AAC GTT
+;;     CAA complement
+;;     AAC reverse
+;; AAC=AAC palindrome!
+
+(defparameter *input-revp*
+  ">Rosalind_24
+TCAATGCATGCGGGTCTATATGCAT")
+
+(defparameter *output-revp*
+  "4 6
+5 4
+6 6
+7 4
+17 4
+18 4
+20 6
+21 4
+")
+
+
+(defun reverse-palindrome-p (dna start length)
+  (let ((mid (+ start (truncate length 2)))
+        (end (+ start length)))
+    (unless (> end (length dna))
+      (string= dna
+               (reverse-complement (subseq dna mid end))
+               :start1 start
+               :end1 mid))))
+
+(defun reverse-palindrome-length (dna start)
+  (iterate (for i :from 12 :downto 4 :by 2)
+           (finding i :such-that (reverse-palindrome-p dna start i))))
+
+(define-problem revp (data stream)
+    *input-revp*
+    *output-revp*
+  (with-output-to-string (s)
+    (iterate
+      (with dna = (cdr (first (read-fasta-into-alist data))))
+      (for i :index-of-vector dna)
+      (when-let ((l (reverse-palindrome-length dna i)))
+        (format s "~D ~D~%" (1+ i) l)))))
+
+
--- a/src/utils.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/src/utils.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -225,6 +225,27 @@
            (collect (cons label data))))
 
 
+;;;; Uniprot ------------------------------------------------------------------
+(defvar *uniprot-cache* (make-hash-table :test #'equal))
+
+(defmacro get-cached (key cache expr)
+  (once-only (key cache)
+    (with-gensyms (value)
+      `(if-found (,value (gethash ,key ,cache))
+         ,value
+         (setf (gethash ,key ,cache) ,expr)))))
+
+(defun uniprot-url (id)
+  (format nil "http://www.uniprot.org/uniprot/~A.fasta" id))
+
+(defun uniprot (id)
+  (get-cached id *uniprot-cache*
+              (-<> (uniprot-url id)
+                drakma:http-request
+                read-fasta-into-alist
+                first)))
+
+
 ;;;; Testing ------------------------------------------------------------------
 (defmacro define-test (problem input output &optional (test 'string=))
   `(test ,(symb 'test- problem)
--- a/vendor/make-quickutils.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/vendor/make-quickutils.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -6,10 +6,11 @@
 
                :compose
                :curry
+               :ensure-gethash
+               :once-only
                :rcurry
+               :symb
                :with-gensyms
-               :once-only
-               :symb
 
                )
   :package "ROSALIND.QUICKUTILS")
--- a/vendor/quickutils.lisp	Sat Dec 15 16:56:57 2018 -0500
+++ b/vendor/quickutils.lisp	Sun Dec 16 18:01:43 2018 -0500
@@ -2,7 +2,7 @@
 ;;;; See http://quickutil.org for details.
 
 ;;;; To regenerate:
-;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :RCURRY :WITH-GENSYMS :ONCE-ONLY :SYMB) :ensure-package T :package "ROSALIND.QUICKUTILS")
+;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :ENSURE-GETHASH :ONCE-ONLY :RCURRY :SYMB :WITH-GENSYMS) :ensure-package T :package "ROSALIND.QUICKUTILS")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (unless (find-package "ROSALIND.QUICKUTILS")
@@ -14,9 +14,9 @@
 
 (when (boundp '*utilities*)
   (setf *utilities* (union *utilities* '(:MAKE-GENSYM-LIST :ENSURE-FUNCTION
-                                         :COMPOSE :CURRY :RCURRY
-                                         :STRING-DESIGNATOR :WITH-GENSYMS
-                                         :ONCE-ONLY :MKSTR :SYMB))))
+                                         :COMPOSE :CURRY :ENSURE-GETHASH
+                                         :ONCE-ONLY :RCURRY :MKSTR :SYMB
+                                         :STRING-DESIGNATOR :WITH-GENSYMS))))
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (defun make-gensym-list (length &optional (x "G"))
     "Returns a list of `length` gensyms, each generated as if with a call to `make-gensym`,
@@ -90,6 +90,55 @@
            (apply ,fun ,@curries more)))))
   
 
+  (defmacro ensure-gethash (key hash-table &optional default)
+    "Like `gethash`, but if `key` is not found in the `hash-table` saves the `default`
+under key before returning it. Secondary return value is true if key was
+already in the table."
+    `(multiple-value-bind (value ok) (gethash ,key ,hash-table)
+       (if ok
+           (values value ok)
+           (values (setf (gethash ,key ,hash-table) ,default) nil))))
+  
+
+  (defmacro once-only (specs &body forms)
+    "Evaluates `forms` with symbols specified in `specs` rebound to temporary
+variables, ensuring that each initform is evaluated only once.
+
+Each of `specs` must either be a symbol naming the variable to be rebound, or of
+the form:
+
+    (symbol initform)
+
+Bare symbols in `specs` are equivalent to
+
+    (symbol symbol)
+
+Example:
+
+    (defmacro cons1 (x) (once-only (x) `(cons ,x ,x)))
+      (let ((y 0)) (cons1 (incf y))) => (1 . 1)"
+    (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY"))
+          (names-and-forms (mapcar (lambda (spec)
+                                     (etypecase spec
+                                       (list
+                                        (destructuring-bind (name form) spec
+                                          (cons name form)))
+                                       (symbol
+                                        (cons spec spec))))
+                                   specs)))
+      ;; bind in user-macro
+      `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n)))))
+              gensyms names-and-forms)
+         ;; bind in final expansion
+         `(let (,,@(mapcar (lambda (g n)
+                             ``(,,g ,,(cdr n)))
+                           gensyms names-and-forms))
+            ;; bind in user-macro
+            ,(let ,(mapcar (lambda (n g) (list (car n) g))
+                    names-and-forms gensyms)
+               ,@forms)))))
+  
+
   (defun rcurry (function &rest arguments)
     "Returns a function that applies the arguments it is called
 with and `arguments` to `function`."
@@ -100,6 +149,23 @@
         (multiple-value-call fn (values-list more) (values-list arguments)))))
   
 
+  (defun mkstr (&rest args)
+    "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string.
+
+Extracted from _On Lisp_, chapter 4."
+    (with-output-to-string (s)
+      (dolist (a args) (princ a s))))
+  
+
+  (defun symb (&rest args)
+    "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol.
+
+Extracted from _On Lisp_, chapter 4.
+
+See also: `symbolicate`"
+    (values (intern (apply #'mkstr args))))
+  
+
   (deftype string-designator ()
     "A string designator type. A string designator is either a string, a symbol,
 or a character."
@@ -143,63 +209,8 @@
 unique symbol the named variable will be bound to."
     `(with-gensyms ,names ,@forms))
   
-
-  (defmacro once-only (specs &body forms)
-    "Evaluates `forms` with symbols specified in `specs` rebound to temporary
-variables, ensuring that each initform is evaluated only once.
-
-Each of `specs` must either be a symbol naming the variable to be rebound, or of
-the form:
-
-    (symbol initform)
-
-Bare symbols in `specs` are equivalent to
-
-    (symbol symbol)
-
-Example:
-
-    (defmacro cons1 (x) (once-only (x) `(cons ,x ,x)))
-      (let ((y 0)) (cons1 (incf y))) => (1 . 1)"
-    (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY"))
-          (names-and-forms (mapcar (lambda (spec)
-                                     (etypecase spec
-                                       (list
-                                        (destructuring-bind (name form) spec
-                                          (cons name form)))
-                                       (symbol
-                                        (cons spec spec))))
-                                   specs)))
-      ;; bind in user-macro
-      `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n)))))
-              gensyms names-and-forms)
-         ;; bind in final expansion
-         `(let (,,@(mapcar (lambda (g n)
-                             ``(,,g ,,(cdr n)))
-                           gensyms names-and-forms))
-            ;; bind in user-macro
-            ,(let ,(mapcar (lambda (n g) (list (car n) g))
-                    names-and-forms gensyms)
-               ,@forms)))))
-  
-
-  (defun mkstr (&rest args)
-    "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string.
-
-Extracted from _On Lisp_, chapter 4."
-    (with-output-to-string (s)
-      (dolist (a args) (princ a s))))
-  
-
-  (defun symb (&rest args)
-    "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol.
-
-Extracted from _On Lisp_, chapter 4.
-
-See also: `symbolicate`"
-    (values (intern (apply #'mkstr args))))
-  
 (eval-when (:compile-toplevel :load-toplevel :execute)
-  (export '(compose curry rcurry with-gensyms with-unique-names once-only symb)))
+  (export '(compose curry ensure-gethash once-only rcurry symb with-gensyms
+            with-unique-names)))
 
 ;;;; END OF quickutils.lisp ;;;;