e279056b1a5b

Start taking some notes
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sat, 03 Nov 2018 18:20:48 -0400 (2018-11-03)
parents 8de2e6d7c9d9
children b8745a9fccd4 f65ed39c371d
branches/tags (none)
files src/problems/dna.lisp src/problems/revc.lisp src/problems/rna.lisp src/utils.lisp

Changes

--- a/src/problems/dna.lisp	Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/dna.lisp	Sat Nov 03 18:20:48 2018 -0400
@@ -1,8 +1,36 @@
 (in-package :rosalind)
 
+;; Nucleic acids are polymers, which means they're long, repeating chains of
+;; smaller molecules called monomers.
+;;
+;; A single monomer of a nucleic acid is called a nucleotide (nt) and has three
+;; parts:
+;;
+;; * A sugar molecule
+;; * A negative ion called a phosphate
+;; * A compound called a nucleobase (base)
+;;
+;; The sugar of one nucleotide binds to the phosphate of the next, forming long
+;; backbones for the bases:
+;;
+;;    …sP-+-sP-+-sP-+-sP-+-sP…
+;;        |    |    |    |
+;;        C    A    T    G          s = sugar molecule
+;;       G    T    A    C           p = phosphate anion
+;;       |    |    |    |
+;;   …sP-+-sP-+-sP-+-sP-+-sP…
+;;
+;; In DNA the sugar is deoxyribose, and there are four possible bases:
+;;
+;; * A: Adenine
+;; * C: Cytosine
+;; * G: Guanine
+;; * T: Thymine
+
 (define-problem dna (data string)
     "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
     "20 12 17 21"
+  "Count the number of each base in `data`."
   (let ((results (frequencies data)))
     (format nil "~D ~D ~D ~D"
             (gethash #\A results 0)
--- a/src/problems/revc.lisp	Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/revc.lisp	Sat Nov 03 18:20:48 2018 -0400
@@ -1,15 +1,46 @@
 (in-package :rosalind)
 
-(define-problem revc (data string)
-    "AAAACCCGGT"
-    "ACCGGGTTTT"
+;; DNA is made up of two strands running in opposite directions, usually twisted
+;; into a double helix, with the bases bonded:
+;;
+;;    …sP-+-sP-+-sP-+-sP-+-sP…
+;;        |    |    |    |
+;;        C    A    T    G          s = sugar molecule
+;;       G    T    A    C           p = phosphate anion
+;;       |    |    |    |
+;;   …sP-+-sP-+-sP-+-sP-+-sP…
+;;
+;; Each base will only bond with one specific other base:
+;;
+;; * Adenine/Thymine
+;; * Cytosine/Guanine
+;;
+;; The "complement" of a base is the other base it will bond to.
+;;
+;; Two bonded bases are called a base pair (bp).  Generally DNA lengths are
+;; specified in numbers of base pairs.
+;;
+;; If we know the order of the bases in one strand, we can figure out the other
+;; strand by taking the reverse complement.
+;;
+;; The problem summaries don't really make it clear what "running in opposite
+;; directions" means.  I think I remember there being something about DNA having
+;; polarized ends, with one end being called 3′ and the other being 5′, but I'm
+;; not 100% sure.
+
+(defun reverse-complement (dna)
   (flet ((dna-complement (base)
            (case base
              (#\A #\T)
              (#\T #\A)
              (#\G #\C)
-             (#\C #\G)
-             (t base)))) ; newline etc
-    (map-into data #'dna-complement data)
-    (nreverse data)))
+             (#\C #\G))))
+    (map-into dna #'dna-complement dna)
+    (nreverse dna)))
 
+(define-problem revc (data string)
+    "AAAACCCGGT"
+    "ACCGGGTTTT"
+  "Return the reverse complement of `data`."
+  (reverse-complement (delete #\newline data)))
+
--- a/src/problems/rna.lisp	Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/rna.lisp	Sat Nov 03 18:20:48 2018 -0400
@@ -1,7 +1,32 @@
 (in-package :rosalind)
 
+;; RNA is a another nucleic acid that is similar to DNA, with the following
+;; differences:
+;;
+;; * It uses ribose for its suger molecules (instead of deoxyribose)
+;; * It contains the base Uracil (U) instead of Thymine
+;; * It's single-helixed instead of double-helixed (I think?)
+;;
+;; RNA is produced from DNA in a multi-step process called "transcription" that
+;; happens in the nucleus (at least in eukaryotes):
+;;
+;; 1. pre-mRNA is produced from DNA
+;; 2. mRNA is produced from pre-mRNA
+;;
+;; After that the mRNA exists the nucleus.  Then proteins are produced from the
+;; mRNA by ribosomes.  That process is called "translation".
+
+(defun transcribe (dna)
+  "Transcribe a fresh RNA string from `DNA`."
+  (substitute #\U #\T dna))
+
+(defun ntranscribe (dna)
+  "Destructively transcribe `DNA` to RNA in-place."
+  (nsubstitute #\U #\T dna))
+
 (define-problem rna (data string)
     "GATGGAACTTGACTACGTAAATT"
     "GAUGGAACUUGACUACGUAAAUU"
-  (substitute #\U #\T data))
+  "Transcribe `data` from DNA into RNA."
+  (ntranscribe data))
 
--- a/src/utils.lisp	Sat Nov 03 17:02:03 2018 -0400
+++ b/src/utils.lisp	Sat Nov 03 18:20:48 2018 -0400
@@ -231,16 +231,20 @@
 
 ;;;; Problems -----------------------------------------------------------------
 (defmacro define-problem (name (arg type) sample-input sample-output &body body)
-  (let ((symbol (symb 'problem- name)))
-    `(progn
-       (defun ,symbol (&optional (,arg ,sample-input))
-         (setf ,arg ,(ecase type
-                       (string `(ensure-string ,arg))
-                       (stream `(ensure-stream ,arg))))
-         (aesthetic-string (progn ,@body)))
-       (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
-       (define-test ,symbol ,sample-input ,sample-output)
-       ',symbol)))
+  (multiple-value-bind (body declarations docstring)
+      (alexandria:parse-body body :documentation t)
+    (let ((symbol (symb 'problem- name)))
+      `(progn
+         (defun ,symbol (&optional (,arg ,sample-input))
+           ,@(when docstring (list docstring))
+           ,@declarations
+           (setf ,arg ,(ecase type
+                         (string `(ensure-string ,arg))
+                         (stream `(ensure-stream ,arg))))
+           (aesthetic-string (progn ,@body)))
+         (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
+         (define-test ,symbol ,sample-input ,sample-output)
+         ',symbol))))
 
 (defun problem-data-path (problem)
   (format nil "~~/Downloads/rosalind_~A.txt" (get problem 'rosalind-name)))