b5923704ce42

MPRT
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Thu, 08 Nov 2018 20:36:22 -0500
parents 4660a7402edb
children b3598dc7ef5c
branches/tags (none)
files rosalind.asd src/problems/mprt.lisp src/utils.lisp vendor/make-quickutils.lisp vendor/quickutils.lisp

Changes

--- a/rosalind.asd	Thu Nov 08 19:23:33 2018 -0500
+++ b/rosalind.asd	Thu Nov 08 20:36:22 2018 -0500
@@ -13,6 +13,8 @@
                :1am
                :alexandria
                :cl-digraph
+               :cl-ppcre
+               :drakma
                :iterate
                :losh
                :str
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/problems/mprt.lisp	Thu Nov 08 20:36:22 2018 -0500
@@ -0,0 +1,63 @@
+(in-package :rosalind)
+
+(defparameter *input-mprt*
+  "A2Z669
+B5ZC00
+P07204_TRBM_HUMAN
+P20840_SAG1_YEAST")
+
+(defparameter *output-mprt*
+  "B5ZC00
+85 118 142 306 395
+P07204_TRBM_HUMAN
+47 115 116 382 409
+P20840_SAG1_YEAST
+79 109 135 248 306 348 364 402 485 501 614
+")
+
+(defparameter *motif-n-glycosylation* "N{P}[ST]{P}")
+
+(defun motif-to-regex (motif)
+  "Turn a protein motif shorthand into a PPCRE scanner."
+  (-<> motif
+    ;; All we have to do is turn {X} into [^X] and compile.
+    (ppcre:regex-replace-all "[{]" <> "[^")
+    (substitute #\] #\} <>)
+    ppcre:create-scanner))
+
+(defun all-matches-dammit (regex target-string)
+  "Return a list of start and end positions of all matches of `regex` on `target-string`.
+
+  Unlike `ppcre:all-matches` this will return ALL matches, even if they're
+  overlapping.  Example:
+
+    (all-matches-dammit \"a..\" \"aabc\")
+    ; =>
+    ; (0 3 1 4)
+
+    (ppcre:all-matches \"a..\" \"aabc\")
+    ; =>
+    ; (0 3) ; dammit
+
+  "
+  ;; cl-ppcre
+  (iterate
+    (with i = 0)
+    (for (values start end) = (ppcre:scan regex target-string :start i))
+    (while start)
+    (collect start)
+    (collect end)
+    (setf i (1+ start))))
+
+
+(define-problem mprt (data stream)
+    *input-mprt*
+    *output-mprt*
+  (with-output-to-string (s)
+    (iterate
+      (with n-glycosylation = (motif-to-regex *motif-n-glycosylation*))
+      (for id :in-stream data :using #'read-line)
+      (for (nil . protein) = (uniprot id))
+      (for matches = (all-matches-dammit n-glycosylation protein))
+      (when matches
+        (format s "~A~%~{~D~*~^ ~}~%" id (mapcar #'1+ matches))))))
--- a/src/utils.lisp	Thu Nov 08 19:23:33 2018 -0500
+++ b/src/utils.lisp	Thu Nov 08 20:36:22 2018 -0500
@@ -225,6 +225,27 @@
            (collect (cons label data))))
 
 
+;;;; Uniprot ------------------------------------------------------------------
+(defvar *uniprot-cache* (make-hash-table :test #'equal))
+
+(defmacro get-cached (key cache expr)
+  (once-only (key cache)
+    (with-gensyms (value)
+      `(if-found (,value (gethash ,key ,cache))
+         ,value
+         (setf (gethash ,key ,cache) ,expr)))))
+
+(defun uniprot-url (id)
+  (format nil "http://www.uniprot.org/uniprot/~A.fasta" id))
+
+(defun uniprot (id)
+  (get-cached id *uniprot-cache*
+              (-<> (uniprot-url id)
+                drakma:http-request
+                read-fasta-into-alist
+                first)))
+
+
 ;;;; Testing ------------------------------------------------------------------
 (defmacro define-test (problem input output &optional (test 'string=))
   `(test ,(symb 'test- problem)
--- a/vendor/make-quickutils.lisp	Thu Nov 08 19:23:33 2018 -0500
+++ b/vendor/make-quickutils.lisp	Thu Nov 08 20:36:22 2018 -0500
@@ -6,10 +6,11 @@
 
                :compose
                :curry
+               :ensure-gethash
+               :once-only
                :rcurry
+               :symb
                :with-gensyms
-               :once-only
-               :symb
 
                )
   :package "ROSALIND.QUICKUTILS")
--- a/vendor/quickutils.lisp	Thu Nov 08 19:23:33 2018 -0500
+++ b/vendor/quickutils.lisp	Thu Nov 08 20:36:22 2018 -0500
@@ -2,7 +2,7 @@
 ;;;; See http://quickutil.org for details.
 
 ;;;; To regenerate:
-;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :RCURRY :WITH-GENSYMS :ONCE-ONLY :SYMB) :ensure-package T :package "ROSALIND.QUICKUTILS")
+;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :ENSURE-GETHASH :ONCE-ONLY :RCURRY :SYMB :WITH-GENSYMS) :ensure-package T :package "ROSALIND.QUICKUTILS")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (unless (find-package "ROSALIND.QUICKUTILS")
@@ -14,9 +14,9 @@
 
 (when (boundp '*utilities*)
   (setf *utilities* (union *utilities* '(:MAKE-GENSYM-LIST :ENSURE-FUNCTION
-                                         :COMPOSE :CURRY :RCURRY
-                                         :STRING-DESIGNATOR :WITH-GENSYMS
-                                         :ONCE-ONLY :MKSTR :SYMB))))
+                                         :COMPOSE :CURRY :ENSURE-GETHASH
+                                         :ONCE-ONLY :RCURRY :MKSTR :SYMB
+                                         :STRING-DESIGNATOR :WITH-GENSYMS))))
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (defun make-gensym-list (length &optional (x "G"))
     "Returns a list of `length` gensyms, each generated as if with a call to `make-gensym`,
@@ -90,6 +90,55 @@
            (apply ,fun ,@curries more)))))
   
 
+  (defmacro ensure-gethash (key hash-table &optional default)
+    "Like `gethash`, but if `key` is not found in the `hash-table` saves the `default`
+under key before returning it. Secondary return value is true if key was
+already in the table."
+    `(multiple-value-bind (value ok) (gethash ,key ,hash-table)
+       (if ok
+           (values value ok)
+           (values (setf (gethash ,key ,hash-table) ,default) nil))))
+  
+
+  (defmacro once-only (specs &body forms)
+    "Evaluates `forms` with symbols specified in `specs` rebound to temporary
+variables, ensuring that each initform is evaluated only once.
+
+Each of `specs` must either be a symbol naming the variable to be rebound, or of
+the form:
+
+    (symbol initform)
+
+Bare symbols in `specs` are equivalent to
+
+    (symbol symbol)
+
+Example:
+
+    (defmacro cons1 (x) (once-only (x) `(cons ,x ,x)))
+      (let ((y 0)) (cons1 (incf y))) => (1 . 1)"
+    (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY"))
+          (names-and-forms (mapcar (lambda (spec)
+                                     (etypecase spec
+                                       (list
+                                        (destructuring-bind (name form) spec
+                                          (cons name form)))
+                                       (symbol
+                                        (cons spec spec))))
+                                   specs)))
+      ;; bind in user-macro
+      `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n)))))
+              gensyms names-and-forms)
+         ;; bind in final expansion
+         `(let (,,@(mapcar (lambda (g n)
+                             ``(,,g ,,(cdr n)))
+                           gensyms names-and-forms))
+            ;; bind in user-macro
+            ,(let ,(mapcar (lambda (n g) (list (car n) g))
+                    names-and-forms gensyms)
+               ,@forms)))))
+  
+
   (defun rcurry (function &rest arguments)
     "Returns a function that applies the arguments it is called
 with and `arguments` to `function`."
@@ -100,6 +149,23 @@
         (multiple-value-call fn (values-list more) (values-list arguments)))))
   
 
+  (defun mkstr (&rest args)
+    "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string.
+
+Extracted from _On Lisp_, chapter 4."
+    (with-output-to-string (s)
+      (dolist (a args) (princ a s))))
+  
+
+  (defun symb (&rest args)
+    "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol.
+
+Extracted from _On Lisp_, chapter 4.
+
+See also: `symbolicate`"
+    (values (intern (apply #'mkstr args))))
+  
+
   (deftype string-designator ()
     "A string designator type. A string designator is either a string, a symbol,
 or a character."
@@ -143,63 +209,8 @@
 unique symbol the named variable will be bound to."
     `(with-gensyms ,names ,@forms))
   
-
-  (defmacro once-only (specs &body forms)
-    "Evaluates `forms` with symbols specified in `specs` rebound to temporary
-variables, ensuring that each initform is evaluated only once.
-
-Each of `specs` must either be a symbol naming the variable to be rebound, or of
-the form:
-
-    (symbol initform)
-
-Bare symbols in `specs` are equivalent to
-
-    (symbol symbol)
-
-Example:
-
-    (defmacro cons1 (x) (once-only (x) `(cons ,x ,x)))
-      (let ((y 0)) (cons1 (incf y))) => (1 . 1)"
-    (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY"))
-          (names-and-forms (mapcar (lambda (spec)
-                                     (etypecase spec
-                                       (list
-                                        (destructuring-bind (name form) spec
-                                          (cons name form)))
-                                       (symbol
-                                        (cons spec spec))))
-                                   specs)))
-      ;; bind in user-macro
-      `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n)))))
-              gensyms names-and-forms)
-         ;; bind in final expansion
-         `(let (,,@(mapcar (lambda (g n)
-                             ``(,,g ,,(cdr n)))
-                           gensyms names-and-forms))
-            ;; bind in user-macro
-            ,(let ,(mapcar (lambda (n g) (list (car n) g))
-                    names-and-forms gensyms)
-               ,@forms)))))
-  
-
-  (defun mkstr (&rest args)
-    "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string.
-
-Extracted from _On Lisp_, chapter 4."
-    (with-output-to-string (s)
-      (dolist (a args) (princ a s))))
-  
-
-  (defun symb (&rest args)
-    "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol.
-
-Extracted from _On Lisp_, chapter 4.
-
-See also: `symbolicate`"
-    (values (intern (apply #'mkstr args))))
-  
 (eval-when (:compile-toplevel :load-toplevel :execute)
-  (export '(compose curry rcurry with-gensyms with-unique-names once-only symb)))
+  (export '(compose curry ensure-gethash once-only rcurry symb with-gensyms
+            with-unique-names)))
 
 ;;;; END OF quickutils.lisp ;;;;