# HG changeset patch # User Steve Losh # Date 1471989577 0 # Node ID 8f91275f1233c7b3b2430be7e6fd22cc5227b325 # Parent 301c22f28112b764c4812b2039996a3d9a0ef111 Add Huffman tree encoding and generation diff -r 301c22f28112 -r 8f91275f1233 Makefile --- a/Makefile Tue Aug 23 00:37:10 2016 +0000 +++ b/Makefile Tue Aug 23 21:59:37 2016 +0000 @@ -1,4 +1,6 @@ -.PHONY: +.PHONY: vendor + +vendor: vendor/quickutils.lisp vendor/quickutils.lisp: vendor/make-quickutils.lisp cd vendor && ros run -L sbcl --load make-quickutils.lisp --eval '(quit)' diff -r 301c22f28112 -r 8f91275f1233 src/huffman-trees.lisp --- a/src/huffman-trees.lisp Tue Aug 23 00:37:10 2016 +0000 +++ b/src/huffman-trees.lisp Tue Aug 23 21:59:37 2016 +0000 @@ -49,6 +49,8 @@ (symbols (required-argument) :type list) (weight (required-argument) :type real)) +(define-with-macro node left right) + (defun tree-symbols (tree) (etypecase tree @@ -69,6 +71,10 @@ :symbols (append (tree-symbols left) (tree-symbols right)))) +(defun length1p (list) + "Return whether `list` has length 1, without traversing it all the way." + (and (consp list) (null (cdr list)))) + ;;;; External Interface ------------------------------------------------------- (defun decode (bits tree) @@ -85,6 +91,53 @@ (recur (rest bits) tree))) (node (recur (rest bits) next-branch)))))))) +(defun encode (message tree) + (labels + ((fail (symbol) + (error "Unknown symbol ~S" symbol)) + (encode-symbol (symbol tree) + (recursively ((tree tree)) + (etypecase tree + (leaf + (if (eql symbol (leaf-symbol tree)) + '() + (fail symbol))) + (node + (with-node (tree) + (cond + ((member symbol (tree-symbols left)) (cons 0 (recur left))) + ((member symbol (tree-symbols right)) (cons 1 (recur right))) + (t (fail symbol))))))))) + (if (null message) + '() + (append (encode-symbol (first message) tree) + (encode (rest message) tree))))) + +(defun encode (message tree) + ;; Alternate version + (flet ((encode-symbol (symbol tree) + (recursively ((tree tree)) + (etypecase tree + (leaf (if (eql symbol (leaf-symbol tree)) + '() + (error "Unknown symbol ~S" symbol))) + (node (with-node (tree) + ;; If it's not in the left, assume it's in the right. If + ;; it's not present at all we'll just recur all the way + ;; down to the rightmost leaf and let that handle the + ;; error. + ;; + ;; This saves a member check at each level, but doesn't + ;; bail early on garbage data. One would hope garbage + ;; data is rare. + (if (member symbol (tree-symbols left)) + (cons 0 (recur left)) + (cons 1 (recur right))))))))) + (if (null message) + '() + (append (encode-symbol (first message) tree) + (encode (rest message) tree))))) + (defun adjoin-set (tree set) (cond @@ -99,17 +152,48 @@ (defun make-leaf-set (pairs) (if (null pairs) '() - (destructuring-bind (symbol weight) + (destructuring-bind (symbol . weight) (first pairs) (adjoin-set (make-leaf symbol weight) (make-leaf-set (rest pairs)))))) +(defun generate-huffman-tree (data) + (check-type data cons) + (labels ((successive-merge (trees) + (if (length1p trees) + (first trees) + (destructuring-bind (a b . rest) trees + (successive-merge + (adjoin-set (make-node a b) rest)))))) + (successive-merge (make-leaf-set (hash-table-alist (frequencies data)))))) + + +;;;; Scratch ------------------------------------------------------------------ (defparameter *sample-tree* (make-node (make-leaf 'a 4) (make-node (make-leaf 'b 2) (make-node (make-leaf 'D 1) (make-leaf 'C 1))))) +(defparameter *song* + '(Well she was just seventeen + You know what I mean + And the way she looked was way beyond compare + So how could I dance with another + When I saw her standing there + + Well she looked at me and I I could see + That before too long Id fall in love with her + She wouldnt dance with another + When I saw her standing there)) + +(defparameter *song-tree* (generate-huffman-tree *song*)) + + ; (decode '(0 1 1 0 0 1 0 1 0 1 1 1 0) *sample-tree*) +; (encode '(A D A B B C A) *sample-tree*) +; (decode (encode '(d a b c a b) *sample-tree*) *sample-tree*) + +; (decode (encode *song* *song-tree*) *song-tree*) diff -r 301c22f28112 -r 8f91275f1233 src/markov.lisp --- a/src/markov.lisp Tue Aug 23 00:37:10 2016 +0000 +++ b/src/markov.lisp Tue Aug 23 21:59:37 2016 +0000 @@ -1,6 +1,7 @@ (in-package #:sand.markov) -(defparameter *text* (slurp "data/lightships-and-lighthouses.txt")) +(defparameter *text* + (read-file-into-string "data/lightships-and-lighthouses.txt")) (defclass markov () ((database :initarg :database :accessor markov-database) @@ -47,7 +48,7 @@ (if-first-time (pushnew prefix beginnings :test 'equal)) (vector-push-extend suffix - (gethash-or-init prefix database (make-vector)))))) + (ensure-gethash prefix database (make-vector)))))) (make-instance 'markov :database database :beginnings (coerce beginnings 'vector)))) diff -r 301c22f28112 -r 8f91275f1233 vendor/make-quickutils.lisp --- a/vendor/make-quickutils.lisp Tue Aug 23 00:37:10 2016 +0000 +++ b/vendor/make-quickutils.lisp Tue Aug 23 21:59:37 2016 +0000 @@ -14,6 +14,9 @@ :tree-collect :ensure-gethash :required-argument + :read-file-into-string + :hash-table-alist + :hash-table-plist ; :switch ; :while ; :ensure-boolean diff -r 301c22f28112 -r 8f91275f1233 vendor/quickutils.lisp --- a/vendor/quickutils.lisp Tue Aug 23 00:37:10 2016 +0000 +++ b/vendor/quickutils.lisp Tue Aug 23 21:59:37 2016 +0000 @@ -2,7 +2,7 @@ ;;;; See http://quickutil.org for details. ;;;; To regenerate: -;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT) :ensure-package T :package "SAND.QUICKUTILS") +;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT :READ-FILE-INTO-STRING :HASH-TABLE-ALIST :HASH-TABLE-PLIST) :ensure-package T :package "SAND.QUICKUTILS") (eval-when (:compile-toplevel :load-toplevel :execute) (unless (find-package "SAND.QUICKUTILS") @@ -18,7 +18,10 @@ :ENSURE-FUNCTION :COMPOSE :CURRY :RCURRY :TAKE :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT - :ENSURE-GETHASH :REQUIRED-ARGUMENT)))) + :ENSURE-GETHASH :REQUIRED-ARGUMENT + :WITH-OPEN-FILE* :WITH-INPUT-FROM-FILE + :READ-FILE-INTO-STRING + :HASH-TABLE-ALIST :HASH-TABLE-PLIST)))) (deftype string-designator () "A string designator type. A string designator is either a string, a symbol, @@ -286,9 +289,82 @@ a default value for required keyword arguments." (error "Required argument ~@[~S ~]missing." name)) + + (defmacro with-open-file* ((stream filespec &key direction element-type + if-exists if-does-not-exist external-format) + &body body) + "Just like `with-open-file`, but `nil` values in the keyword arguments mean to use +the default value specified for `open`." + (once-only (direction element-type if-exists if-does-not-exist external-format) + `(with-open-stream + (,stream (apply #'open ,filespec + (append + (when ,direction + (list :direction ,direction)) + (when ,element-type + (list :element-type ,element-type)) + (when ,if-exists + (list :if-exists ,if-exists)) + (when ,if-does-not-exist + (list :if-does-not-exist ,if-does-not-exist)) + (when ,external-format + (list :external-format ,external-format))))) + ,@body))) + + + (defmacro with-input-from-file ((stream-name file-name &rest args + &key (direction nil direction-p) + &allow-other-keys) + &body body) + "Evaluate `body` with `stream-name` to an input stream on the file +`file-name`. `args` is sent as is to the call to `open` except `external-format`, +which is only sent to `with-open-file` when it's not `nil`." + (declare (ignore direction)) + (when direction-p + (error "Can't specifiy :DIRECTION for WITH-INPUT-FROM-FILE.")) + `(with-open-file* (,stream-name ,file-name :direction :input ,@args) + ,@body)) + + + (defun read-file-into-string (pathname &key (buffer-size 4096) external-format) + "Return the contents of the file denoted by `pathname` as a fresh string. + +The `external-format` parameter will be passed directly to `with-open-file` +unless it's `nil`, which means the system default." + (with-input-from-file + (file-stream pathname :external-format external-format) + (let ((*print-pretty* nil)) + (with-output-to-string (datum) + (let ((buffer (make-array buffer-size :element-type 'character))) + (loop + :for bytes-read = (read-sequence buffer file-stream) + :do (write-sequence buffer datum :start 0 :end bytes-read) + :while (= bytes-read buffer-size))))))) + + + (defun hash-table-alist (table) + "Returns an association list containing the keys and values of hash table +`table`." + (let ((alist nil)) + (maphash (lambda (k v) + (push (cons k v) alist)) + table) + alist)) + + + (defun hash-table-plist (table) + "Returns a property list containing the keys and values of hash table +`table`." + (let ((plist nil)) + (maphash (lambda (k v) + (setf plist (list* k v plist))) + table) + plist)) + (eval-when (:compile-toplevel :load-toplevel :execute) (export '(with-gensyms with-unique-names once-only compose curry rcurry n-grams define-constant riffle tree-collect ensure-gethash - required-argument))) + required-argument read-file-into-string hash-table-alist + hash-table-plist))) ;;;; END OF quickutils.lisp ;;;;