8f91275f1233

Add Huffman tree encoding and generation
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Tue, 23 Aug 2016 21:59:37 +0000
parents 301c22f28112
children 8a0871a52d1d
branches/tags (none)
files Makefile src/huffman-trees.lisp src/markov.lisp vendor/make-quickutils.lisp vendor/quickutils.lisp

Changes

--- a/Makefile	Tue Aug 23 00:37:10 2016 +0000
+++ b/Makefile	Tue Aug 23 21:59:37 2016 +0000
@@ -1,4 +1,6 @@
-.PHONY:
+.PHONY: vendor
+
+vendor: vendor/quickutils.lisp
 
 vendor/quickutils.lisp: vendor/make-quickutils.lisp
 	cd vendor && ros run -L sbcl --load make-quickutils.lisp  --eval '(quit)'
--- a/src/huffman-trees.lisp	Tue Aug 23 00:37:10 2016 +0000
+++ b/src/huffman-trees.lisp	Tue Aug 23 21:59:37 2016 +0000
@@ -49,6 +49,8 @@
   (symbols (required-argument) :type list)
   (weight (required-argument) :type real))
 
+(define-with-macro node left right)
+
 
 (defun tree-symbols (tree)
   (etypecase tree
@@ -69,6 +71,10 @@
               :symbols (append (tree-symbols left)
                                (tree-symbols right))))
 
+(defun length1p (list)
+  "Return whether `list` has length 1, without traversing it all the way."
+  (and (consp list) (null (cdr list))))
+
 
 ;;;; External Interface -------------------------------------------------------
 (defun decode (bits tree)
@@ -85,6 +91,53 @@
                         (recur (rest bits) tree)))
             (node (recur (rest bits) next-branch))))))))
 
+(defun encode (message tree)
+  (labels
+      ((fail (symbol)
+         (error "Unknown symbol ~S" symbol))
+       (encode-symbol (symbol tree)
+         (recursively ((tree tree))
+           (etypecase tree
+             (leaf
+               (if (eql symbol (leaf-symbol tree))
+                 '()
+                 (fail symbol)))
+             (node
+               (with-node (tree)
+                 (cond
+                   ((member symbol (tree-symbols left)) (cons 0 (recur left)))
+                   ((member symbol (tree-symbols right)) (cons 1 (recur right)))
+                   (t (fail symbol)))))))))
+    (if (null message)
+      '()
+      (append (encode-symbol (first message) tree)
+              (encode (rest message) tree)))))
+
+(defun encode (message tree)
+  ;; Alternate version
+  (flet ((encode-symbol (symbol tree)
+           (recursively ((tree tree))
+             (etypecase tree
+               (leaf (if (eql symbol (leaf-symbol tree))
+                       '()
+                       (error "Unknown symbol ~S" symbol)))
+               (node (with-node (tree)
+                       ;; If it's not in the left, assume it's in the right.  If
+                       ;; it's not present at all we'll just recur all the way
+                       ;; down to the rightmost leaf and let that handle the
+                       ;; error.
+                       ;;
+                       ;; This saves a member check at each level, but doesn't
+                       ;; bail early on garbage data.  One would hope garbage
+                       ;; data is rare.
+                       (if (member symbol (tree-symbols left))
+                         (cons 0 (recur left))
+                         (cons 1 (recur right)))))))))
+    (if (null message)
+      '()
+      (append (encode-symbol (first message) tree)
+              (encode (rest message) tree)))))
+
 
 (defun adjoin-set (tree set)
   (cond
@@ -99,17 +152,48 @@
 (defun make-leaf-set (pairs)
   (if (null pairs)
     '()
-    (destructuring-bind (symbol weight)
+    (destructuring-bind (symbol . weight)
         (first pairs)
       (adjoin-set (make-leaf symbol weight)
                   (make-leaf-set (rest pairs))))))
 
 
+(defun generate-huffman-tree (data)
+  (check-type data cons)
+  (labels ((successive-merge (trees)
+             (if (length1p trees)
+               (first trees)
+               (destructuring-bind (a b . rest) trees
+                 (successive-merge
+                   (adjoin-set (make-node a b) rest))))))
+    (successive-merge (make-leaf-set (hash-table-alist (frequencies data))))))
+
+
+;;;; Scratch ------------------------------------------------------------------
 (defparameter *sample-tree*
   (make-node (make-leaf 'a 4)
              (make-node (make-leaf 'b 2)
                         (make-node (make-leaf 'D 1)
                                    (make-leaf 'C 1)))))
 
+(defparameter *song*
+  '(Well she was just seventeen
+    You know what I mean
+    And the way she looked was way beyond compare
+    So how could I dance with another
+    When I saw her standing there
+
+    Well she looked at me and I I could see
+    That before too long Id fall in love with her
+    She wouldnt dance with another
+    When I saw her standing there))
+
+(defparameter *song-tree* (generate-huffman-tree *song*))
+
+
 
 ; (decode '(0 1 1 0 0 1 0 1 0 1 1 1 0) *sample-tree*)
+; (encode '(A D A B B C A) *sample-tree*)
+; (decode (encode '(d a b c a b) *sample-tree*) *sample-tree*)
+
+; (decode (encode *song* *song-tree*) *song-tree*)
--- a/src/markov.lisp	Tue Aug 23 00:37:10 2016 +0000
+++ b/src/markov.lisp	Tue Aug 23 21:59:37 2016 +0000
@@ -1,6 +1,7 @@
 (in-package #:sand.markov)
 
-(defparameter *text* (slurp "data/lightships-and-lighthouses.txt"))
+(defparameter *text*
+  (read-file-into-string "data/lightships-and-lighthouses.txt"))
 
 (defclass markov ()
   ((database :initarg :database :accessor markov-database)
@@ -47,7 +48,7 @@
           (if-first-time (pushnew prefix beginnings :test 'equal))
           (vector-push-extend
             suffix
-            (gethash-or-init prefix database (make-vector))))))
+            (ensure-gethash prefix database (make-vector))))))
     (make-instance 'markov
                    :database database
                    :beginnings (coerce beginnings 'vector))))
--- a/vendor/make-quickutils.lisp	Tue Aug 23 00:37:10 2016 +0000
+++ b/vendor/make-quickutils.lisp	Tue Aug 23 21:59:37 2016 +0000
@@ -14,6 +14,9 @@
                :tree-collect
                :ensure-gethash
                :required-argument
+               :read-file-into-string
+               :hash-table-alist
+               :hash-table-plist
                ; :switch
                ; :while
                ; :ensure-boolean
--- a/vendor/quickutils.lisp	Tue Aug 23 00:37:10 2016 +0000
+++ b/vendor/quickutils.lisp	Tue Aug 23 21:59:37 2016 +0000
@@ -2,7 +2,7 @@
 ;;;; See http://quickutil.org for details.
 
 ;;;; To regenerate:
-;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT) :ensure-package T :package "SAND.QUICKUTILS")
+;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT :READ-FILE-INTO-STRING :HASH-TABLE-ALIST :HASH-TABLE-PLIST) :ensure-package T :package "SAND.QUICKUTILS")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (unless (find-package "SAND.QUICKUTILS")
@@ -18,7 +18,10 @@
                                          :ENSURE-FUNCTION :COMPOSE :CURRY
                                          :RCURRY :TAKE :N-GRAMS
                                          :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT
-                                         :ENSURE-GETHASH :REQUIRED-ARGUMENT))))
+                                         :ENSURE-GETHASH :REQUIRED-ARGUMENT
+                                         :WITH-OPEN-FILE* :WITH-INPUT-FROM-FILE
+                                         :READ-FILE-INTO-STRING
+                                         :HASH-TABLE-ALIST :HASH-TABLE-PLIST))))
 
   (deftype string-designator ()
     "A string designator type. A string designator is either a string, a symbol,
@@ -286,9 +289,82 @@
 a default value for required keyword arguments."
     (error "Required argument ~@[~S ~]missing." name))
   
+
+  (defmacro with-open-file* ((stream filespec &key direction element-type
+                                                   if-exists if-does-not-exist external-format)
+                             &body body)
+    "Just like `with-open-file`, but `nil` values in the keyword arguments mean to use
+the default value specified for `open`."
+    (once-only (direction element-type if-exists if-does-not-exist external-format)
+      `(with-open-stream
+           (,stream (apply #'open ,filespec
+                           (append
+                            (when ,direction
+                              (list :direction ,direction))
+                            (when ,element-type
+                              (list :element-type ,element-type))
+                            (when ,if-exists
+                              (list :if-exists ,if-exists))
+                            (when ,if-does-not-exist
+                              (list :if-does-not-exist ,if-does-not-exist))
+                            (when ,external-format
+                              (list :external-format ,external-format)))))
+         ,@body)))
+  
+
+  (defmacro with-input-from-file ((stream-name file-name &rest args
+                                                         &key (direction nil direction-p)
+                                                         &allow-other-keys)
+                                  &body body)
+    "Evaluate `body` with `stream-name` to an input stream on the file
+`file-name`. `args` is sent as is to the call to `open` except `external-format`,
+which is only sent to `with-open-file` when it's not `nil`."
+    (declare (ignore direction))
+    (when direction-p
+      (error "Can't specifiy :DIRECTION for WITH-INPUT-FROM-FILE."))
+    `(with-open-file* (,stream-name ,file-name :direction :input ,@args)
+       ,@body))
+  
+
+  (defun read-file-into-string (pathname &key (buffer-size 4096) external-format)
+    "Return the contents of the file denoted by `pathname` as a fresh string.
+
+The `external-format` parameter will be passed directly to `with-open-file`
+unless it's `nil`, which means the system default."
+    (with-input-from-file
+        (file-stream pathname :external-format external-format)
+      (let ((*print-pretty* nil))
+        (with-output-to-string (datum)
+          (let ((buffer (make-array buffer-size :element-type 'character)))
+            (loop
+              :for bytes-read = (read-sequence buffer file-stream)
+              :do (write-sequence buffer datum :start 0 :end bytes-read)
+              :while (= bytes-read buffer-size)))))))
+  
+
+  (defun hash-table-alist (table)
+    "Returns an association list containing the keys and values of hash table
+`table`."
+    (let ((alist nil))
+      (maphash (lambda (k v)
+                 (push (cons k v) alist))
+               table)
+      alist))
+  
+
+  (defun hash-table-plist (table)
+    "Returns a property list containing the keys and values of hash table
+`table`."
+    (let ((plist nil))
+      (maphash (lambda (k v)
+                 (setf plist (list* k v plist)))
+               table)
+      plist))
+  
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (export '(with-gensyms with-unique-names once-only compose curry rcurry
             n-grams define-constant riffle tree-collect ensure-gethash
-            required-argument)))
+            required-argument read-file-into-string hash-table-alist
+            hash-table-plist)))
 
 ;;;; END OF quickutils.lisp ;;;;