# HG changeset patch # User Steve Losh # Date 1541727382 18000 # Node ID b5923704ce428db4cd72dd106fef06a0223f1cc5 # Parent 4660a7402edb6802dbb58776af9c61a782bce64c MPRT diff -r 4660a7402edb -r b5923704ce42 rosalind.asd --- a/rosalind.asd Thu Nov 08 19:23:33 2018 -0500 +++ b/rosalind.asd Thu Nov 08 20:36:22 2018 -0500 @@ -13,6 +13,8 @@ :1am :alexandria :cl-digraph + :cl-ppcre + :drakma :iterate :losh :str diff -r 4660a7402edb -r b5923704ce42 src/problems/mprt.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/problems/mprt.lisp Thu Nov 08 20:36:22 2018 -0500 @@ -0,0 +1,63 @@ +(in-package :rosalind) + +(defparameter *input-mprt* + "A2Z669 +B5ZC00 +P07204_TRBM_HUMAN +P20840_SAG1_YEAST") + +(defparameter *output-mprt* + "B5ZC00 +85 118 142 306 395 +P07204_TRBM_HUMAN +47 115 116 382 409 +P20840_SAG1_YEAST +79 109 135 248 306 348 364 402 485 501 614 +") + +(defparameter *motif-n-glycosylation* "N{P}[ST]{P}") + +(defun motif-to-regex (motif) + "Turn a protein motif shorthand into a PPCRE scanner." + (-<> motif + ;; All we have to do is turn {X} into [^X] and compile. + (ppcre:regex-replace-all "[{]" <> "[^") + (substitute #\] #\} <>) + ppcre:create-scanner)) + +(defun all-matches-dammit (regex target-string) + "Return a list of start and end positions of all matches of `regex` on `target-string`. + + Unlike `ppcre:all-matches` this will return ALL matches, even if they're + overlapping. Example: + + (all-matches-dammit \"a..\" \"aabc\") + ; => + ; (0 3 1 4) + + (ppcre:all-matches \"a..\" \"aabc\") + ; => + ; (0 3) ; dammit + + " + ;; cl-ppcre + (iterate + (with i = 0) + (for (values start end) = (ppcre:scan regex target-string :start i)) + (while start) + (collect start) + (collect end) + (setf i (1+ start)))) + + +(define-problem mprt (data stream) + *input-mprt* + *output-mprt* + (with-output-to-string (s) + (iterate + (with n-glycosylation = (motif-to-regex *motif-n-glycosylation*)) + (for id :in-stream data :using #'read-line) + (for (nil . protein) = (uniprot id)) + (for matches = (all-matches-dammit n-glycosylation protein)) + (when matches + (format s "~A~%~{~D~*~^ ~}~%" id (mapcar #'1+ matches)))))) diff -r 4660a7402edb -r b5923704ce42 src/utils.lisp --- a/src/utils.lisp Thu Nov 08 19:23:33 2018 -0500 +++ b/src/utils.lisp Thu Nov 08 20:36:22 2018 -0500 @@ -225,6 +225,27 @@ (collect (cons label data)))) +;;;; Uniprot ------------------------------------------------------------------ +(defvar *uniprot-cache* (make-hash-table :test #'equal)) + +(defmacro get-cached (key cache expr) + (once-only (key cache) + (with-gensyms (value) + `(if-found (,value (gethash ,key ,cache)) + ,value + (setf (gethash ,key ,cache) ,expr))))) + +(defun uniprot-url (id) + (format nil "http://www.uniprot.org/uniprot/~A.fasta" id)) + +(defun uniprot (id) + (get-cached id *uniprot-cache* + (-<> (uniprot-url id) + drakma:http-request + read-fasta-into-alist + first))) + + ;;;; Testing ------------------------------------------------------------------ (defmacro define-test (problem input output &optional (test 'string=)) `(test ,(symb 'test- problem) diff -r 4660a7402edb -r b5923704ce42 vendor/make-quickutils.lisp --- a/vendor/make-quickutils.lisp Thu Nov 08 19:23:33 2018 -0500 +++ b/vendor/make-quickutils.lisp Thu Nov 08 20:36:22 2018 -0500 @@ -6,10 +6,11 @@ :compose :curry + :ensure-gethash + :once-only :rcurry + :symb :with-gensyms - :once-only - :symb ) :package "ROSALIND.QUICKUTILS") diff -r 4660a7402edb -r b5923704ce42 vendor/quickutils.lisp --- a/vendor/quickutils.lisp Thu Nov 08 19:23:33 2018 -0500 +++ b/vendor/quickutils.lisp Thu Nov 08 20:36:22 2018 -0500 @@ -2,7 +2,7 @@ ;;;; See http://quickutil.org for details. ;;;; To regenerate: -;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :RCURRY :WITH-GENSYMS :ONCE-ONLY :SYMB) :ensure-package T :package "ROSALIND.QUICKUTILS") +;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:COMPOSE :CURRY :ENSURE-GETHASH :ONCE-ONLY :RCURRY :SYMB :WITH-GENSYMS) :ensure-package T :package "ROSALIND.QUICKUTILS") (eval-when (:compile-toplevel :load-toplevel :execute) (unless (find-package "ROSALIND.QUICKUTILS") @@ -14,9 +14,9 @@ (when (boundp '*utilities*) (setf *utilities* (union *utilities* '(:MAKE-GENSYM-LIST :ENSURE-FUNCTION - :COMPOSE :CURRY :RCURRY - :STRING-DESIGNATOR :WITH-GENSYMS - :ONCE-ONLY :MKSTR :SYMB)))) + :COMPOSE :CURRY :ENSURE-GETHASH + :ONCE-ONLY :RCURRY :MKSTR :SYMB + :STRING-DESIGNATOR :WITH-GENSYMS)))) (eval-when (:compile-toplevel :load-toplevel :execute) (defun make-gensym-list (length &optional (x "G")) "Returns a list of `length` gensyms, each generated as if with a call to `make-gensym`, @@ -90,6 +90,55 @@ (apply ,fun ,@curries more))))) + (defmacro ensure-gethash (key hash-table &optional default) + "Like `gethash`, but if `key` is not found in the `hash-table` saves the `default` +under key before returning it. Secondary return value is true if key was +already in the table." + `(multiple-value-bind (value ok) (gethash ,key ,hash-table) + (if ok + (values value ok) + (values (setf (gethash ,key ,hash-table) ,default) nil)))) + + + (defmacro once-only (specs &body forms) + "Evaluates `forms` with symbols specified in `specs` rebound to temporary +variables, ensuring that each initform is evaluated only once. + +Each of `specs` must either be a symbol naming the variable to be rebound, or of +the form: + + (symbol initform) + +Bare symbols in `specs` are equivalent to + + (symbol symbol) + +Example: + + (defmacro cons1 (x) (once-only (x) `(cons ,x ,x))) + (let ((y 0)) (cons1 (incf y))) => (1 . 1)" + (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY")) + (names-and-forms (mapcar (lambda (spec) + (etypecase spec + (list + (destructuring-bind (name form) spec + (cons name form))) + (symbol + (cons spec spec)))) + specs))) + ;; bind in user-macro + `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n))))) + gensyms names-and-forms) + ;; bind in final expansion + `(let (,,@(mapcar (lambda (g n) + ``(,,g ,,(cdr n))) + gensyms names-and-forms)) + ;; bind in user-macro + ,(let ,(mapcar (lambda (n g) (list (car n) g)) + names-and-forms gensyms) + ,@forms))))) + + (defun rcurry (function &rest arguments) "Returns a function that applies the arguments it is called with and `arguments` to `function`." @@ -100,6 +149,23 @@ (multiple-value-call fn (values-list more) (values-list arguments))))) + (defun mkstr (&rest args) + "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string. + +Extracted from _On Lisp_, chapter 4." + (with-output-to-string (s) + (dolist (a args) (princ a s)))) + + + (defun symb (&rest args) + "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol. + +Extracted from _On Lisp_, chapter 4. + +See also: `symbolicate`" + (values (intern (apply #'mkstr args)))) + + (deftype string-designator () "A string designator type. A string designator is either a string, a symbol, or a character." @@ -143,63 +209,8 @@ unique symbol the named variable will be bound to." `(with-gensyms ,names ,@forms)) - - (defmacro once-only (specs &body forms) - "Evaluates `forms` with symbols specified in `specs` rebound to temporary -variables, ensuring that each initform is evaluated only once. - -Each of `specs` must either be a symbol naming the variable to be rebound, or of -the form: - - (symbol initform) - -Bare symbols in `specs` are equivalent to - - (symbol symbol) - -Example: - - (defmacro cons1 (x) (once-only (x) `(cons ,x ,x))) - (let ((y 0)) (cons1 (incf y))) => (1 . 1)" - (let ((gensyms (make-gensym-list (length specs) "ONCE-ONLY")) - (names-and-forms (mapcar (lambda (spec) - (etypecase spec - (list - (destructuring-bind (name form) spec - (cons name form))) - (symbol - (cons spec spec)))) - specs))) - ;; bind in user-macro - `(let ,(mapcar (lambda (g n) (list g `(gensym ,(string (car n))))) - gensyms names-and-forms) - ;; bind in final expansion - `(let (,,@(mapcar (lambda (g n) - ``(,,g ,,(cdr n))) - gensyms names-and-forms)) - ;; bind in user-macro - ,(let ,(mapcar (lambda (n g) (list (car n) g)) - names-and-forms gensyms) - ,@forms))))) - - - (defun mkstr (&rest args) - "Receives any number of objects (string, symbol, keyword, char, number), extracts all printed representations, and concatenates them all into one string. - -Extracted from _On Lisp_, chapter 4." - (with-output-to-string (s) - (dolist (a args) (princ a s)))) - - - (defun symb (&rest args) - "Receives any number of objects, concatenates all into one string with `#'mkstr` and converts them to symbol. - -Extracted from _On Lisp_, chapter 4. - -See also: `symbolicate`" - (values (intern (apply #'mkstr args)))) - (eval-when (:compile-toplevel :load-toplevel :execute) - (export '(compose curry rcurry with-gensyms with-unique-names once-only symb))) + (export '(compose curry ensure-gethash once-only rcurry symb with-gensyms + with-unique-names))) ;;;; END OF quickutils.lisp ;;;;