b2decfc394fa

Rename file
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sun, 26 Jul 2020 15:20:55 -0400
parents 716d1110c12f
children 84fa1724b747
branches/tags (none)
files .hgignore src/hamming.lisp src/huffman.lisp

Changes

--- a/.hgignore	Sat Jul 25 15:48:24 2020 -0400
+++ b/.hgignore	Sun Jul 26 15:20:55 2020 -0400
@@ -1,4 +1,4 @@
-syntax:regex
+syntax:regexp
 ^c$
 ^e$
 scratch.lisp
--- a/src/hamming.lisp	Sat Jul 25 15:48:24 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,188 +0,0 @@
-(in-package :tdcb/huffman)
-
-;;;; Counts and Chunks --------------------------------------------------------
-(defun count-bytes (byte-vector)
-  (loop :with result = (make-array 256 :initial-element 0)
-        :for b :across byte-vector
-        :do (incf (aref result b))
-        :finally (return result)))
-
-(defun next-chunk (counts &key (start 0) (skips 0))
-  (when-let ((start (position-if #'plusp counts :start start)))
-    (iterate (with end = nil)
-             (with skips-remaining = skips)
-             (for c :in-vector counts :with-index i :from start)
-             (if (plusp c)
-               (setf end i
-                     skips-remaining skips)
-               (when (minusp (decf skips-remaining))
-                 (finish)))
-             (returning (list start end (subseq counts start (1+ end)))
-                        i))))
-
-(defun counts-to-chunks (counts &key (skips 0))
-  (iterate
-    (with start = 0)
-    (for (values chunk i) = (next-chunk counts :start start :skips skips))
-    (while chunk)
-    (collect chunk)
-    (setf start i)))
-
-(defun chunks-to-counts (chunks)
-  (let ((result (make-array 256 :initial-element 0)))
-    (iterate (for (start end vals) :in chunks)
-             (replace result vals :start1 start :end1 (1+ end)))
-    result))
-
-(defun print-chunks (chunks stream)
-  (if (null chunks)
-    (write-octets stream #(0 0 0))
-    (iterate (for (start end counts) :in chunks)
-             (write-octet stream start)
-             (write-octet stream end)
-             (write-octets stream counts)
-             (finally (write-octet stream 0)))))
-
-(defun read-chunks (stream)
-  (iterate (for start = (read-octet stream))
-           (until (and (zerop start) (not (first-time-p))))
-           (for end = (read-octet stream))
-           (for chunk = (read-octets stream (1+ (- end start))))
-           (collect (list start end chunk))))
-
-
-;;;; Trees --------------------------------------------------------------------
-(defclass* tree ()
-  (b0 b1 weight))
-
-(defclass* leaf ()
-  (value weight))
-
-(defmethod print-object ((o leaf) stream)
-  (print-unreadable-object (o stream :type t)
-    (format stream "~D ~S weight ~D"
-            (value o)
-            (code-char (value o))
-            (weight o))))
-
-(defmethod print-object ((o tree) stream)
-  (print-unreadable-object (o stream :type t)
-    (format stream "weight ~D" (weight o))))
-
-(defun print-tree (tree)
-  (recursively ((node tree) (indent 0))
-    (format t "~vA~A~%" indent "" node)
-    (when (typep node 'tree)
-      (recur (b0 node) (+ indent 4))
-      (recur (b1 node) (+ indent 4))))
-  tree)
-
-(defun make-tree (counts)
-  "Create a Huffman tree from `counts`."
-  (let ((heap (pileup:make-heap #'< :key #'weight :size 256)))
-    (iterate (for c :in-vector counts :with-index i)
-             (unless (zerop c)
-               (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap)))
-    (iterate
-      (for a = (pileup:heap-pop heap))
-      (when (pileup:heap-empty-p heap)
-        (return a)) ; todo handle single-leaf trees
-      (for b = (pileup:heap-pop heap))
-      (for weight = (+ (weight a) (weight b)))
-      (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight)
-                          heap))))
-
-
-;;;; Code Maps ----------------------------------------------------------------
-(defun make-code-map (tree)
-  "Turn a Huffman `tree` into a code map.
-
-  A code map is a 256-element vector.  The indexes are the bytes, and the values
-  are `(code . code-length)` conses, or `nil` if that byte was not present in
-  the tree.
-
-  "
-  (let ((result (make-array 256 :initial-element nil)))
-    (recursively ((node tree) (code 0) (code-length 0))
-      (etypecase node
-        (tree (recur (b0 node) (ash code 1) (1+ code-length))
-              (recur (b1 node) (+ (ash code 1) 1) (1+ code-length)))
-        (leaf (setf (aref result (value node))
-                    (cons code (max 1 code-length))))))
-    result))
-
-(defun print-code-map (map)
-  (iterate
-    (with vals = (_ (iterate
-                      (for item :in-vector map :with-index value)
-                      (when item
-                        (collect (list value (car item) (cdr item)))))
-                   (stable-sort _ #'< :key #'second) ; sort by code
-                   (stable-sort _ #'< :key #'third))) ; then by length
-    (for (value code code-length) :in vals)
-    (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%"
-            code-length code
-            value (code-char value)
-            code code-length)))
-
-
-;;;; Trees --------------------------------------------------------------------
-(defun encode (byte map bits)
-  "Encode `byte` to `bits` using the Huffman `map`."
-  (destructuring-bind (code . code-length) (aref map byte)
-    (write-bits bits code code-length)))
-
-(defun decode (bits tree)
-  "Decode a byte from `bits` using the Huffman `tree`."
-  (recursively ((node tree))
-    (if (typep node 'leaf)
-      (value node)
-      (recur (if (zerop (read-bit bits))
-               (b0 node)
-               (b1 node))))))
-
-
-(defun compress% (in out)
-  (let* ((data (alexandria:read-stream-content-into-byte-vector in))
-         (counts (count-bytes data))
-         (chunks (counts-to-chunks counts))
-         (tree (make-tree counts))
-         (map (make-code-map tree)))
-    (print-chunks chunks out)
-    (iterate (for byte :in-vector data)
-             (encode byte map out))))
-
-(defparameter *compress-ui*
-  (adopt:make-interface
-    :name "c huffman"
-    :summary "simple Huffman algorithm"
-    :usage "[OPTIONS]"
-    :help "Compress using a simple Huffman algorithm."))
-
-(defparameter *extract-ui*
-  (adopt:make-interface
-    :name "e huffman"
-    :summary "simple Huffman algorithm"
-    :usage "[OPTIONS]"
-    :help "Extract using a simple Huffman algorithm."))
-
-(defun compress (arguments)
-  (multiple-value-bind (arguments options)
-      (adopt:parse-options *compress-ui* arguments)
-    (destructuring-bind (in-path out-path) arguments
-      (with-open-file (in in-path :direction :input :element-type 'u8)
-        (with-open-file (out out-path :direction :output :element-type 'u8)
-          (with-bits-output (out out)
-            (compress% in out)))))))
-
-(defun extract (arguments)
-  (multiple-value-bind (arguments options)
-      (adopt:parse-options *compress-ui* arguments)
-    (destructuring-bind (in-path out-path) arguments
-      (with-open-file (in in-path :direction :input :element-type 'u8)
-        (with-open-file (out out-path :direction :output :element-type 'u8)
-          (with-bits-input (in in)
-            (extract% in out)))))))
-
-(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress)
-(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/huffman.lisp	Sun Jul 26 15:20:55 2020 -0400
@@ -0,0 +1,188 @@
+(in-package :tdcb/huffman)
+
+;;;; Counts and Chunks --------------------------------------------------------
+(defun count-bytes (byte-vector)
+  (loop :with result = (make-array 256 :initial-element 0)
+        :for b :across byte-vector
+        :do (incf (aref result b))
+        :finally (return result)))
+
+(defun next-chunk (counts &key (start 0) (skips 0))
+  (when-let ((start (position-if #'plusp counts :start start)))
+    (iterate (with end = nil)
+             (with skips-remaining = skips)
+             (for c :in-vector counts :with-index i :from start)
+             (if (plusp c)
+               (setf end i
+                     skips-remaining skips)
+               (when (minusp (decf skips-remaining))
+                 (finish)))
+             (returning (list start end (subseq counts start (1+ end)))
+                        i))))
+
+(defun counts-to-chunks (counts &key (skips 0))
+  (iterate
+    (with start = 0)
+    (for (values chunk i) = (next-chunk counts :start start :skips skips))
+    (while chunk)
+    (collect chunk)
+    (setf start i)))
+
+(defun chunks-to-counts (chunks)
+  (let ((result (make-array 256 :initial-element 0)))
+    (iterate (for (start end vals) :in chunks)
+             (replace result vals :start1 start :end1 (1+ end)))
+    result))
+
+(defun print-chunks (chunks stream)
+  (if (null chunks)
+    (write-octets stream #(0 0 0))
+    (iterate (for (start end counts) :in chunks)
+             (write-octet stream start)
+             (write-octet stream end)
+             (write-octets stream counts)
+             (finally (write-octet stream 0)))))
+
+(defun read-chunks (stream)
+  (iterate (for start = (read-octet stream))
+           (until (and (zerop start) (not (first-time-p))))
+           (for end = (read-octet stream))
+           (for chunk = (read-octets stream (1+ (- end start))))
+           (collect (list start end chunk))))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defclass* tree ()
+  (b0 b1 weight))
+
+(defclass* leaf ()
+  (value weight))
+
+(defmethod print-object ((o leaf) stream)
+  (print-unreadable-object (o stream :type t)
+    (format stream "~D ~S weight ~D"
+            (value o)
+            (code-char (value o))
+            (weight o))))
+
+(defmethod print-object ((o tree) stream)
+  (print-unreadable-object (o stream :type t)
+    (format stream "weight ~D" (weight o))))
+
+(defun print-tree (tree)
+  (recursively ((node tree) (indent 0))
+    (format t "~vA~A~%" indent "" node)
+    (when (typep node 'tree)
+      (recur (b0 node) (+ indent 4))
+      (recur (b1 node) (+ indent 4))))
+  tree)
+
+(defun make-tree (counts)
+  "Create a Huffman tree from `counts`."
+  (let ((heap (pileup:make-heap #'< :key #'weight :size 256)))
+    (iterate (for c :in-vector counts :with-index i)
+             (unless (zerop c)
+               (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap)))
+    (iterate
+      (for a = (pileup:heap-pop heap))
+      (when (pileup:heap-empty-p heap)
+        (return a)) ; todo handle single-leaf trees
+      (for b = (pileup:heap-pop heap))
+      (for weight = (+ (weight a) (weight b)))
+      (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight)
+                          heap))))
+
+
+;;;; Code Maps ----------------------------------------------------------------
+(defun make-code-map (tree)
+  "Turn a Huffman `tree` into a code map.
+
+  A code map is a 256-element vector.  The indexes are the bytes, and the values
+  are `(code . code-length)` conses, or `nil` if that byte was not present in
+  the tree.
+
+  "
+  (let ((result (make-array 256 :initial-element nil)))
+    (recursively ((node tree) (code 0) (code-length 0))
+      (etypecase node
+        (tree (recur (b0 node) (ash code 1) (1+ code-length))
+              (recur (b1 node) (+ (ash code 1) 1) (1+ code-length)))
+        (leaf (setf (aref result (value node))
+                    (cons code (max 1 code-length))))))
+    result))
+
+(defun print-code-map (map)
+  (iterate
+    (with vals = (_ (iterate
+                      (for item :in-vector map :with-index value)
+                      (when item
+                        (collect (list value (car item) (cdr item)))))
+                   (stable-sort _ #'< :key #'second) ; sort by code
+                   (stable-sort _ #'< :key #'third))) ; then by length
+    (for (value code code-length) :in vals)
+    (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%"
+            code-length code
+            value (code-char value)
+            code code-length)))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defun encode (byte map bits)
+  "Encode `byte` to `bits` using the Huffman `map`."
+  (destructuring-bind (code . code-length) (aref map byte)
+    (write-bits bits code code-length)))
+
+(defun decode (bits tree)
+  "Decode a byte from `bits` using the Huffman `tree`."
+  (recursively ((node tree))
+    (if (typep node 'leaf)
+      (value node)
+      (recur (if (zerop (read-bit bits))
+               (b0 node)
+               (b1 node))))))
+
+
+(defun compress% (in out)
+  (let* ((data (alexandria:read-stream-content-into-byte-vector in))
+         (counts (count-bytes data))
+         (chunks (counts-to-chunks counts))
+         (tree (make-tree counts))
+         (map (make-code-map tree)))
+    (print-chunks chunks out)
+    (iterate (for byte :in-vector data)
+             (encode byte map out))))
+
+(defparameter *compress-ui*
+  (adopt:make-interface
+    :name "c huffman"
+    :summary "simple Huffman algorithm"
+    :usage "[OPTIONS]"
+    :help "Compress using a simple Huffman algorithm."))
+
+(defparameter *extract-ui*
+  (adopt:make-interface
+    :name "e huffman"
+    :summary "simple Huffman algorithm"
+    :usage "[OPTIONS]"
+    :help "Extract using a simple Huffman algorithm."))
+
+(defun compress (arguments)
+  (multiple-value-bind (arguments options)
+      (adopt:parse-options *compress-ui* arguments)
+    (destructuring-bind (in-path out-path) arguments
+      (with-open-file (in in-path :direction :input :element-type 'u8)
+        (with-open-file (out out-path :direction :output :element-type 'u8)
+          (with-bits-output (out out)
+            (compress% in out)))))))
+
+(defun extract (arguments)
+  (multiple-value-bind (arguments options)
+      (adopt:parse-options *compress-ui* arguments)
+    (destructuring-bind (in-path out-path) arguments
+      (with-open-file (in in-path :direction :input :element-type 'u8)
+        (with-open-file (out out-path :direction :output :element-type 'u8)
+          (with-bits-input (in in)
+            (extract% in out)))))))
+
+(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress)
+(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)