# HG changeset patch # User Steve Losh # Date 1541283648 14400 # Node ID e279056b1a5b2935f6e16694234803e68b0fa5f1 # Parent 8de2e6d7c9d918f1c2e3a3224067004bacfec1de Start taking some notes diff -r 8de2e6d7c9d9 -r e279056b1a5b src/problems/dna.lisp --- a/src/problems/dna.lisp Sat Nov 03 17:02:03 2018 -0400 +++ b/src/problems/dna.lisp Sat Nov 03 18:20:48 2018 -0400 @@ -1,8 +1,36 @@ (in-package :rosalind) +;; Nucleic acids are polymers, which means they're long, repeating chains of +;; smaller molecules called monomers. +;; +;; A single monomer of a nucleic acid is called a nucleotide (nt) and has three +;; parts: +;; +;; * A sugar molecule +;; * A negative ion called a phosphate +;; * A compound called a nucleobase (base) +;; +;; The sugar of one nucleotide binds to the phosphate of the next, forming long +;; backbones for the bases: +;; +;; …sP-+-sP-+-sP-+-sP-+-sP… +;; | | | | +;; C A T G s = sugar molecule +;; G T A C p = phosphate anion +;; | | | | +;; …sP-+-sP-+-sP-+-sP-+-sP… +;; +;; In DNA the sugar is deoxyribose, and there are four possible bases: +;; +;; * A: Adenine +;; * C: Cytosine +;; * G: Guanine +;; * T: Thymine + (define-problem dna (data string) "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" "20 12 17 21" + "Count the number of each base in `data`." (let ((results (frequencies data))) (format nil "~D ~D ~D ~D" (gethash #\A results 0) diff -r 8de2e6d7c9d9 -r e279056b1a5b src/problems/revc.lisp --- a/src/problems/revc.lisp Sat Nov 03 17:02:03 2018 -0400 +++ b/src/problems/revc.lisp Sat Nov 03 18:20:48 2018 -0400 @@ -1,15 +1,46 @@ (in-package :rosalind) -(define-problem revc (data string) - "AAAACCCGGT" - "ACCGGGTTTT" +;; DNA is made up of two strands running in opposite directions, usually twisted +;; into a double helix, with the bases bonded: +;; +;; …sP-+-sP-+-sP-+-sP-+-sP… +;; | | | | +;; C A T G s = sugar molecule +;; G T A C p = phosphate anion +;; | | | | +;; …sP-+-sP-+-sP-+-sP-+-sP… +;; +;; Each base will only bond with one specific other base: +;; +;; * Adenine/Thymine +;; * Cytosine/Guanine +;; +;; The "complement" of a base is the other base it will bond to. +;; +;; Two bonded bases are called a base pair (bp). Generally DNA lengths are +;; specified in numbers of base pairs. +;; +;; If we know the order of the bases in one strand, we can figure out the other +;; strand by taking the reverse complement. +;; +;; The problem summaries don't really make it clear what "running in opposite +;; directions" means. I think I remember there being something about DNA having +;; polarized ends, with one end being called 3′ and the other being 5′, but I'm +;; not 100% sure. + +(defun reverse-complement (dna) (flet ((dna-complement (base) (case base (#\A #\T) (#\T #\A) (#\G #\C) - (#\C #\G) - (t base)))) ; newline etc - (map-into data #'dna-complement data) - (nreverse data))) + (#\C #\G)))) + (map-into dna #'dna-complement dna) + (nreverse dna))) +(define-problem revc (data string) + "AAAACCCGGT" + "ACCGGGTTTT" + "Return the reverse complement of `data`." + (reverse-complement (delete #\newline data))) + diff -r 8de2e6d7c9d9 -r e279056b1a5b src/problems/rna.lisp --- a/src/problems/rna.lisp Sat Nov 03 17:02:03 2018 -0400 +++ b/src/problems/rna.lisp Sat Nov 03 18:20:48 2018 -0400 @@ -1,7 +1,32 @@ (in-package :rosalind) +;; RNA is a another nucleic acid that is similar to DNA, with the following +;; differences: +;; +;; * It uses ribose for its suger molecules (instead of deoxyribose) +;; * It contains the base Uracil (U) instead of Thymine +;; * It's single-helixed instead of double-helixed (I think?) +;; +;; RNA is produced from DNA in a multi-step process called "transcription" that +;; happens in the nucleus (at least in eukaryotes): +;; +;; 1. pre-mRNA is produced from DNA +;; 2. mRNA is produced from pre-mRNA +;; +;; After that the mRNA exists the nucleus. Then proteins are produced from the +;; mRNA by ribosomes. That process is called "translation". + +(defun transcribe (dna) + "Transcribe a fresh RNA string from `DNA`." + (substitute #\U #\T dna)) + +(defun ntranscribe (dna) + "Destructively transcribe `DNA` to RNA in-place." + (nsubstitute #\U #\T dna)) + (define-problem rna (data string) "GATGGAACTTGACTACGTAAATT" "GAUGGAACUUGACUACGUAAAUU" - (substitute #\U #\T data)) + "Transcribe `data` from DNA into RNA." + (ntranscribe data)) diff -r 8de2e6d7c9d9 -r e279056b1a5b src/utils.lisp --- a/src/utils.lisp Sat Nov 03 17:02:03 2018 -0400 +++ b/src/utils.lisp Sat Nov 03 18:20:48 2018 -0400 @@ -231,16 +231,20 @@ ;;;; Problems ----------------------------------------------------------------- (defmacro define-problem (name (arg type) sample-input sample-output &body body) - (let ((symbol (symb 'problem- name))) - `(progn - (defun ,symbol (&optional (,arg ,sample-input)) - (setf ,arg ,(ecase type - (string `(ensure-string ,arg)) - (stream `(ensure-stream ,arg)))) - (aesthetic-string (progn ,@body))) - (setf (get ',symbol 'rosalind-name) ,(string-downcase name)) - (define-test ,symbol ,sample-input ,sample-output) - ',symbol))) + (multiple-value-bind (body declarations docstring) + (alexandria:parse-body body :documentation t) + (let ((symbol (symb 'problem- name))) + `(progn + (defun ,symbol (&optional (,arg ,sample-input)) + ,@(when docstring (list docstring)) + ,@declarations + (setf ,arg ,(ecase type + (string `(ensure-string ,arg)) + (stream `(ensure-stream ,arg)))) + (aesthetic-string (progn ,@body))) + (setf (get ',symbol 'rosalind-name) ,(string-downcase name)) + (define-test ,symbol ,sample-input ,sample-output) + ',symbol)))) (defun problem-data-path (problem) (format nil "~~/Downloads/rosalind_~A.txt" (get problem 'rosalind-name)))