src/markov.lisp @ 2cb0208c1744

Markov by Tungsten Light
author Steve Losh <steve@stevelosh.com>
date Wed, 17 Aug 2016 03:29:14 +0000
parents (none)
children 4199b9a26696
(in-package #:sand.markov)

(defparameter *text* (slurp "data/lightships-and-lighthouses.txt"))

(defclass markov ()
  ((database :initarg :database :accessor markov-database)
   (beginnings :initarg :beginnings :accessor markov-beginnings)))


(defun make-vector (&optional (initial-length 1))
  (make-array initial-length :fill-pointer 0 :adjustable t))


(defun delimiterp (c)
  (member c '(#\space #\newline) :test #'char=))

(defun sentence-end-p (word)
  (member (aref word (1- (length word)))
          '(#\. #\? #\!)))


(defun split-words (string)
  (split-sequence-if #'delimiterp string :remove-empty-subseqs t))

(defun partition-if (pred seq)
  (iterate
    (for element :in seq)
    (collect element :into current)
    (when (funcall pred element)
      (collect current :into result)
      (setf current nil))
    (finally (return result))))


(defun build-markov-generator (corpus order)
  (let* ((database (make-hash-table :test 'equal))
         (beginnings nil)
         (words (split-words corpus))
         (sentences (partition-if #'sentence-end-p words)))
    (iterate
      (for sentence :in sentences)
      (when (> (length sentence) order)
        (iterate
          (for chunk :in (n-grams (1+ order) sentence))
          (for prefix = (take order chunk))
          (for suffix = (car (last chunk)))
          (if-first-time (pushnew prefix beginnings :test 'equal))
          (vector-push-extend
            suffix
            (gethash-or-init prefix database (make-vector))))))
    (make-instance 'markov
                   :database database
                   :beginnings (coerce beginnings 'vector))))


(defun generate-sentence (markov)
  (iterate
    (repeat 50)
    (with start = (random-elt (markov-beginnings markov)))
    (for prefix :first start :then (append (cdr prefix) (list word)))
    (for word = (random-elt (gethash prefix (markov-database markov))))
    (collect word :into sentence)
    (until (sentence-end-p word))
    (finally (return (format nil "~{~A~^ ~}" (append start sentence))))))



(defparameter *m* (build-markov-generator *text* 2))


(iterate (repeat 10)
         (terpri)
         (terpri)
         (princ (generate-sentence *m*)))