--- a/src/problems/dna.lisp Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/dna.lisp Sat Nov 03 18:20:48 2018 -0400
@@ -1,8 +1,36 @@
(in-package :rosalind)
+;; Nucleic acids are polymers, which means they're long, repeating chains of
+;; smaller molecules called monomers.
+;;
+;; A single monomer of a nucleic acid is called a nucleotide (nt) and has three
+;; parts:
+;;
+;; * A sugar molecule
+;; * A negative ion called a phosphate
+;; * A compound called a nucleobase (base)
+;;
+;; The sugar of one nucleotide binds to the phosphate of the next, forming long
+;; backbones for the bases:
+;;
+;; …sP-+-sP-+-sP-+-sP-+-sP…
+;; | | | |
+;; C A T G s = sugar molecule
+;; G T A C p = phosphate anion
+;; | | | |
+;; …sP-+-sP-+-sP-+-sP-+-sP…
+;;
+;; In DNA the sugar is deoxyribose, and there are four possible bases:
+;;
+;; * A: Adenine
+;; * C: Cytosine
+;; * G: Guanine
+;; * T: Thymine
+
(define-problem dna (data string)
"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
"20 12 17 21"
+ "Count the number of each base in `data`."
(let ((results (frequencies data)))
(format nil "~D ~D ~D ~D"
(gethash #\A results 0)
--- a/src/problems/revc.lisp Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/revc.lisp Sat Nov 03 18:20:48 2018 -0400
@@ -1,15 +1,46 @@
(in-package :rosalind)
-(define-problem revc (data string)
- "AAAACCCGGT"
- "ACCGGGTTTT"
+;; DNA is made up of two strands running in opposite directions, usually twisted
+;; into a double helix, with the bases bonded:
+;;
+;; …sP-+-sP-+-sP-+-sP-+-sP…
+;; | | | |
+;; C A T G s = sugar molecule
+;; G T A C p = phosphate anion
+;; | | | |
+;; …sP-+-sP-+-sP-+-sP-+-sP…
+;;
+;; Each base will only bond with one specific other base:
+;;
+;; * Adenine/Thymine
+;; * Cytosine/Guanine
+;;
+;; The "complement" of a base is the other base it will bond to.
+;;
+;; Two bonded bases are called a base pair (bp). Generally DNA lengths are
+;; specified in numbers of base pairs.
+;;
+;; If we know the order of the bases in one strand, we can figure out the other
+;; strand by taking the reverse complement.
+;;
+;; The problem summaries don't really make it clear what "running in opposite
+;; directions" means. I think I remember there being something about DNA having
+;; polarized ends, with one end being called 3′ and the other being 5′, but I'm
+;; not 100% sure.
+
+(defun reverse-complement (dna)
(flet ((dna-complement (base)
(case base
(#\A #\T)
(#\T #\A)
(#\G #\C)
- (#\C #\G)
- (t base)))) ; newline etc
- (map-into data #'dna-complement data)
- (nreverse data)))
+ (#\C #\G))))
+ (map-into dna #'dna-complement dna)
+ (nreverse dna)))
+(define-problem revc (data string)
+ "AAAACCCGGT"
+ "ACCGGGTTTT"
+ "Return the reverse complement of `data`."
+ (reverse-complement (delete #\newline data)))
+
--- a/src/problems/rna.lisp Sat Nov 03 17:02:03 2018 -0400
+++ b/src/problems/rna.lisp Sat Nov 03 18:20:48 2018 -0400
@@ -1,7 +1,32 @@
(in-package :rosalind)
+;; RNA is a another nucleic acid that is similar to DNA, with the following
+;; differences:
+;;
+;; * It uses ribose for its suger molecules (instead of deoxyribose)
+;; * It contains the base Uracil (U) instead of Thymine
+;; * It's single-helixed instead of double-helixed (I think?)
+;;
+;; RNA is produced from DNA in a multi-step process called "transcription" that
+;; happens in the nucleus (at least in eukaryotes):
+;;
+;; 1. pre-mRNA is produced from DNA
+;; 2. mRNA is produced from pre-mRNA
+;;
+;; After that the mRNA exists the nucleus. Then proteins are produced from the
+;; mRNA by ribosomes. That process is called "translation".
+
+(defun transcribe (dna)
+ "Transcribe a fresh RNA string from `DNA`."
+ (substitute #\U #\T dna))
+
+(defun ntranscribe (dna)
+ "Destructively transcribe `DNA` to RNA in-place."
+ (nsubstitute #\U #\T dna))
+
(define-problem rna (data string)
"GATGGAACTTGACTACGTAAATT"
"GAUGGAACUUGACUACGUAAAUU"
- (substitute #\U #\T data))
+ "Transcribe `data` from DNA into RNA."
+ (ntranscribe data))
--- a/src/utils.lisp Sat Nov 03 17:02:03 2018 -0400
+++ b/src/utils.lisp Sat Nov 03 18:20:48 2018 -0400
@@ -231,16 +231,20 @@
;;;; Problems -----------------------------------------------------------------
(defmacro define-problem (name (arg type) sample-input sample-output &body body)
- (let ((symbol (symb 'problem- name)))
- `(progn
- (defun ,symbol (&optional (,arg ,sample-input))
- (setf ,arg ,(ecase type
- (string `(ensure-string ,arg))
- (stream `(ensure-stream ,arg))))
- (aesthetic-string (progn ,@body)))
- (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
- (define-test ,symbol ,sample-input ,sample-output)
- ',symbol)))
+ (multiple-value-bind (body declarations docstring)
+ (alexandria:parse-body body :documentation t)
+ (let ((symbol (symb 'problem- name)))
+ `(progn
+ (defun ,symbol (&optional (,arg ,sample-input))
+ ,@(when docstring (list docstring))
+ ,@declarations
+ (setf ,arg ,(ecase type
+ (string `(ensure-string ,arg))
+ (stream `(ensure-stream ,arg))))
+ (aesthetic-string (progn ,@body)))
+ (setf (get ',symbol 'rosalind-name) ,(string-downcase name))
+ (define-test ,symbol ,sample-input ,sample-output)
+ ',symbol))))
(defun problem-data-path (problem)
(format nil "~~/Downloads/rosalind_~A.txt" (get problem 'rosalind-name)))