src/alignment.lisp @ f2a11ed01196
default tip
Add Needleman-Wunsch
author |
Steve Losh <steve@stevelosh.com> |
date |
Sat, 02 Nov 2019 11:06:08 -0400 |
parents |
(none) |
children |
(none) |
(defpackage :sand.alignment
(:use :cl :losh :iterate))
(in-package :sand.alignment)
;;;; An implementation of the Needleman-Wunsch global alignment algorithm.
(declaim (ftype (function (base-char base-char) fixnum) basic-score))
(defun basic-score (a b)
(if (char= a b)
1
-1))
(defun make-needleman-wunsch-grid (rows cols)
(let ((grid (make-array (list rows cols)
:element-type 'fixnum
:initial-element 0)))
(iterate (for row :below rows)
(for score :downfrom 0)
(setf (aref grid row 0) score))
(iterate (for col :below cols)
(for score :downfrom 0)
(setf (aref grid 0 col) score))
grid))
(defun extract-path (prev)
(iterate
(with (rows cols) = (array-dimensions prev))
(for coord
:first (cons (1- rows) (1- cols)) ; start at the bottom right
:then (aref prev (car coord) (cdr coord))) ; move according to the path
(until (null coord))
(collect coord :at :start)))
(defun compute-alignment (seq1 seq2 path)
(macrolet
((mark (&body dest-value-pairs)
`(progn
,@(loop :for (dest value) :on dest-value-pairs :by #'cddr
:collect `(collect ,value
:into ,dest
:result-type 'simple-base-string)))))
(iterate
(for (r . c) :in path)
(for pr :previous r)
(for pc :previous c)
(if-first-time
;; Chew off the gap at the beginning of either sequence, if any.
(progn (dotimes (i r)
(mark align1 #\-
middle #\Space
align2 (aref seq2 i)))
(dotimes (i c)
(mark align1 (aref seq1 i)
middle #\Space
align2 #\-)))
;; Mark another character.
(progn
(for ch1 = (aref seq1 (1- c)))
(for ch2 = (aref seq2 (1- r)))
(mark align1 (if (= c pc) #\- ch1)
align2 (if (= r pr) #\- ch2)
middle (cond
((or (= r pr) (= c pc)) #\space)
((char= ch1 ch2) #\|)
(t #\x)))))
(finally (return (values align1 middle align2))))))
(defun print-grid (seq1 seq2 grid &key (pad 4))
(flet ((print-padded (seq)
(map nil (lambda (el) (format t "~V@A" pad el)) seq))
(row-of (array row) ; ugly hack, but just for debugging…
(iterate
(for col :below (array-dimension array 1))
(collect (aref array row col)))))
(print-padded " ")
(print-padded seq1)
(terpri)
(iterate
(for row :below (array-dimension grid 0))
(print-padded (list (if (plusp row)
(aref seq2 (1- row))
#\Space)))
(print-padded (row-of grid row))
(terpri))))
(defun needleman-wunsch (seq1 seq2 &key
(score-function #'basic-score)
(gap-score -1))
(setf seq1 (coerce seq1 'simple-base-string)
seq2 (coerce seq2 'simple-base-string))
(iterate
(with rows = (1+ (length seq2)))
(with cols = (1+ (length seq1)))
(with grid = (make-needleman-wunsch-grid rows cols))
(with prev = (make-array (list rows cols) :initial-element nil))
(for row :from 1 :below rows)
(for ch2 :in-string seq2)
(iterate
(for col :from 1 :below cols)
(for ch1 :in-string seq1)
(for row-1 = (1- row))
(for col-1 = (1- col))
(for match = (+ (aref grid row-1 col-1)
(funcall score-function ch1 ch2)))
(for gap-1 = (+ (aref grid row col-1) gap-score))
(for gap-2 = (+ (aref grid row-1 col) gap-score))
(for best = (max match gap-1 gap-2))
(setf (aref grid row col) best
(aref prev row col) (cond
((= best match) (cons row-1 col-1))
((= best gap-1) (cons row col-1))
((= best gap-2) (cons col-1 row)))))
(finally
(print-grid seq1 seq2 grid)
(return
(compute-alignment seq1 seq2 (extract-path prev))))))
;; (needleman-wunsch
;; "GGGGTTATAAAAC"
;; "GGTAT")