src/problems/prsm.lisp @ 7fcd748a4f00

LCSQ, PRSM
author Steve Losh <steve@stevelosh.com>
date Mon, 08 Aug 2022 19:00:58 -0400
parents (none)
children (none)
(defpackage :rosalind/prsm (:use :cl :rosalind :losh :iterate))
(in-package :rosalind/prsm)

(defparameter *input* "4
GSDMQS
VWICN
IASWMQS
PVSMGAD
445.17838
115.02694
186.07931
314.13789
317.1198
215.09061")

(defparameter *output* "3
GSDMQS")


(defun complete-spectrum (protein)
  (let ((result (make-hash-table)))
    (incf (gethash (u:monoisotopic-mass protein) result 0))
    (iterate (for i :from 1 :below (length protein))
             (incf (gethash (u:monoisotopic-mass protein :start i) result 0)))
    (iterate (for i :from (1- (length protein)) :above 0)
             (incf (gethash (u:monoisotopic-mass protein :end i) result 0)))
    result))

(defun multiplicity (spectrum string)
  ;; TODO extract these into utils?
  (cdr (rosalind/conv::msmax
         (rosalind/conv::minkowski-difference spectrum
                                              (complete-spectrum string)))))


(define-problem prsm (data stream) *input* *output*
  (let* ((*read-default-float-format* 'rational)
         (n (parse-integer (read-line data)))
         (proteins (gimme n (read-line data)))
         (spectrum (frequencies (u:read-lines data :key #'read-from-string))))
    (iterate
      (for protein :in proteins)
      (for m = (multiplicity spectrum protein))
      (finding (cons m protein) :maximizing m :into result)
      (finally (return (format nil "~D~%~A" (car result) (cdr result)))))))