src/problems/orf.lisp @ 870270771fde

PDST
author Steve Losh <steve@stevelosh.com>
date Sun, 19 Jan 2020 21:14:53 -0500
parents 2735aa6aab79
children 86d92162dc1f
(defpackage :rosalind/orf (:use :cl :rosalind :losh :iterate))
(in-package :rosalind/orf)

(defparameter *input*
  ">Rosalind_99
AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG")

(defparameter *output*
  "M
MTPRLGLESLLE
MGMTPRLGLESLLE
MLLGSFRLIPKETLIQVAGSSPCNLS")


(defun translate-all (rna)
  "Return all possible proteins that can be translated from `rna`."
  (iterate
    (for start :first 0 :then (1+ protein-start))
    (for (values protein protein-start) = (u:translate rna :start start))
    (while protein)
    (collect protein)))

(define-problem orf (data stream) *input* *output*
  (let* ((dna (cdr (first (u:read-fasta-into-alist data))))
         (rna1 (u:transcribe dna))
         (rna2 (u:transcribe (u:reverse-complement dna))))
    (-<> (append (translate-all rna1)
                 (translate-all rna2))
      (remove-duplicates <> :test #'string=)
      (sort <> #'< :key #'length)
      (format nil "~{~A~^~%~}" <>))))