--- a/.hgignore Sat Jul 25 15:48:24 2020 -0400
+++ b/.hgignore Sun Jul 26 15:20:55 2020 -0400
@@ -1,4 +1,4 @@
-syntax:regex
+syntax:regexp
^c$
^e$
scratch.lisp
--- a/src/hamming.lisp Sat Jul 25 15:48:24 2020 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,188 +0,0 @@
-(in-package :tdcb/huffman)
-
-;;;; Counts and Chunks --------------------------------------------------------
-(defun count-bytes (byte-vector)
- (loop :with result = (make-array 256 :initial-element 0)
- :for b :across byte-vector
- :do (incf (aref result b))
- :finally (return result)))
-
-(defun next-chunk (counts &key (start 0) (skips 0))
- (when-let ((start (position-if #'plusp counts :start start)))
- (iterate (with end = nil)
- (with skips-remaining = skips)
- (for c :in-vector counts :with-index i :from start)
- (if (plusp c)
- (setf end i
- skips-remaining skips)
- (when (minusp (decf skips-remaining))
- (finish)))
- (returning (list start end (subseq counts start (1+ end)))
- i))))
-
-(defun counts-to-chunks (counts &key (skips 0))
- (iterate
- (with start = 0)
- (for (values chunk i) = (next-chunk counts :start start :skips skips))
- (while chunk)
- (collect chunk)
- (setf start i)))
-
-(defun chunks-to-counts (chunks)
- (let ((result (make-array 256 :initial-element 0)))
- (iterate (for (start end vals) :in chunks)
- (replace result vals :start1 start :end1 (1+ end)))
- result))
-
-(defun print-chunks (chunks stream)
- (if (null chunks)
- (write-octets stream #(0 0 0))
- (iterate (for (start end counts) :in chunks)
- (write-octet stream start)
- (write-octet stream end)
- (write-octets stream counts)
- (finally (write-octet stream 0)))))
-
-(defun read-chunks (stream)
- (iterate (for start = (read-octet stream))
- (until (and (zerop start) (not (first-time-p))))
- (for end = (read-octet stream))
- (for chunk = (read-octets stream (1+ (- end start))))
- (collect (list start end chunk))))
-
-
-;;;; Trees --------------------------------------------------------------------
-(defclass* tree ()
- (b0 b1 weight))
-
-(defclass* leaf ()
- (value weight))
-
-(defmethod print-object ((o leaf) stream)
- (print-unreadable-object (o stream :type t)
- (format stream "~D ~S weight ~D"
- (value o)
- (code-char (value o))
- (weight o))))
-
-(defmethod print-object ((o tree) stream)
- (print-unreadable-object (o stream :type t)
- (format stream "weight ~D" (weight o))))
-
-(defun print-tree (tree)
- (recursively ((node tree) (indent 0))
- (format t "~vA~A~%" indent "" node)
- (when (typep node 'tree)
- (recur (b0 node) (+ indent 4))
- (recur (b1 node) (+ indent 4))))
- tree)
-
-(defun make-tree (counts)
- "Create a Huffman tree from `counts`."
- (let ((heap (pileup:make-heap #'< :key #'weight :size 256)))
- (iterate (for c :in-vector counts :with-index i)
- (unless (zerop c)
- (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap)))
- (iterate
- (for a = (pileup:heap-pop heap))
- (when (pileup:heap-empty-p heap)
- (return a)) ; todo handle single-leaf trees
- (for b = (pileup:heap-pop heap))
- (for weight = (+ (weight a) (weight b)))
- (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight)
- heap))))
-
-
-;;;; Code Maps ----------------------------------------------------------------
-(defun make-code-map (tree)
- "Turn a Huffman `tree` into a code map.
-
- A code map is a 256-element vector. The indexes are the bytes, and the values
- are `(code . code-length)` conses, or `nil` if that byte was not present in
- the tree.
-
- "
- (let ((result (make-array 256 :initial-element nil)))
- (recursively ((node tree) (code 0) (code-length 0))
- (etypecase node
- (tree (recur (b0 node) (ash code 1) (1+ code-length))
- (recur (b1 node) (+ (ash code 1) 1) (1+ code-length)))
- (leaf (setf (aref result (value node))
- (cons code (max 1 code-length))))))
- result))
-
-(defun print-code-map (map)
- (iterate
- (with vals = (_ (iterate
- (for item :in-vector map :with-index value)
- (when item
- (collect (list value (car item) (cdr item)))))
- (stable-sort _ #'< :key #'second) ; sort by code
- (stable-sort _ #'< :key #'third))) ; then by length
- (for (value code code-length) :in vals)
- (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%"
- code-length code
- value (code-char value)
- code code-length)))
-
-
-;;;; Trees --------------------------------------------------------------------
-(defun encode (byte map bits)
- "Encode `byte` to `bits` using the Huffman `map`."
- (destructuring-bind (code . code-length) (aref map byte)
- (write-bits bits code code-length)))
-
-(defun decode (bits tree)
- "Decode a byte from `bits` using the Huffman `tree`."
- (recursively ((node tree))
- (if (typep node 'leaf)
- (value node)
- (recur (if (zerop (read-bit bits))
- (b0 node)
- (b1 node))))))
-
-
-(defun compress% (in out)
- (let* ((data (alexandria:read-stream-content-into-byte-vector in))
- (counts (count-bytes data))
- (chunks (counts-to-chunks counts))
- (tree (make-tree counts))
- (map (make-code-map tree)))
- (print-chunks chunks out)
- (iterate (for byte :in-vector data)
- (encode byte map out))))
-
-(defparameter *compress-ui*
- (adopt:make-interface
- :name "c huffman"
- :summary "simple Huffman algorithm"
- :usage "[OPTIONS]"
- :help "Compress using a simple Huffman algorithm."))
-
-(defparameter *extract-ui*
- (adopt:make-interface
- :name "e huffman"
- :summary "simple Huffman algorithm"
- :usage "[OPTIONS]"
- :help "Extract using a simple Huffman algorithm."))
-
-(defun compress (arguments)
- (multiple-value-bind (arguments options)
- (adopt:parse-options *compress-ui* arguments)
- (destructuring-bind (in-path out-path) arguments
- (with-open-file (in in-path :direction :input :element-type 'u8)
- (with-open-file (out out-path :direction :output :element-type 'u8)
- (with-bits-output (out out)
- (compress% in out)))))))
-
-(defun extract (arguments)
- (multiple-value-bind (arguments options)
- (adopt:parse-options *compress-ui* arguments)
- (destructuring-bind (in-path out-path) arguments
- (with-open-file (in in-path :direction :input :element-type 'u8)
- (with-open-file (out out-path :direction :output :element-type 'u8)
- (with-bits-input (in in)
- (extract% in out)))))))
-
-(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress)
-(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/huffman.lisp Sun Jul 26 15:20:55 2020 -0400
@@ -0,0 +1,188 @@
+(in-package :tdcb/huffman)
+
+;;;; Counts and Chunks --------------------------------------------------------
+(defun count-bytes (byte-vector)
+ (loop :with result = (make-array 256 :initial-element 0)
+ :for b :across byte-vector
+ :do (incf (aref result b))
+ :finally (return result)))
+
+(defun next-chunk (counts &key (start 0) (skips 0))
+ (when-let ((start (position-if #'plusp counts :start start)))
+ (iterate (with end = nil)
+ (with skips-remaining = skips)
+ (for c :in-vector counts :with-index i :from start)
+ (if (plusp c)
+ (setf end i
+ skips-remaining skips)
+ (when (minusp (decf skips-remaining))
+ (finish)))
+ (returning (list start end (subseq counts start (1+ end)))
+ i))))
+
+(defun counts-to-chunks (counts &key (skips 0))
+ (iterate
+ (with start = 0)
+ (for (values chunk i) = (next-chunk counts :start start :skips skips))
+ (while chunk)
+ (collect chunk)
+ (setf start i)))
+
+(defun chunks-to-counts (chunks)
+ (let ((result (make-array 256 :initial-element 0)))
+ (iterate (for (start end vals) :in chunks)
+ (replace result vals :start1 start :end1 (1+ end)))
+ result))
+
+(defun print-chunks (chunks stream)
+ (if (null chunks)
+ (write-octets stream #(0 0 0))
+ (iterate (for (start end counts) :in chunks)
+ (write-octet stream start)
+ (write-octet stream end)
+ (write-octets stream counts)
+ (finally (write-octet stream 0)))))
+
+(defun read-chunks (stream)
+ (iterate (for start = (read-octet stream))
+ (until (and (zerop start) (not (first-time-p))))
+ (for end = (read-octet stream))
+ (for chunk = (read-octets stream (1+ (- end start))))
+ (collect (list start end chunk))))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defclass* tree ()
+ (b0 b1 weight))
+
+(defclass* leaf ()
+ (value weight))
+
+(defmethod print-object ((o leaf) stream)
+ (print-unreadable-object (o stream :type t)
+ (format stream "~D ~S weight ~D"
+ (value o)
+ (code-char (value o))
+ (weight o))))
+
+(defmethod print-object ((o tree) stream)
+ (print-unreadable-object (o stream :type t)
+ (format stream "weight ~D" (weight o))))
+
+(defun print-tree (tree)
+ (recursively ((node tree) (indent 0))
+ (format t "~vA~A~%" indent "" node)
+ (when (typep node 'tree)
+ (recur (b0 node) (+ indent 4))
+ (recur (b1 node) (+ indent 4))))
+ tree)
+
+(defun make-tree (counts)
+ "Create a Huffman tree from `counts`."
+ (let ((heap (pileup:make-heap #'< :key #'weight :size 256)))
+ (iterate (for c :in-vector counts :with-index i)
+ (unless (zerop c)
+ (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap)))
+ (iterate
+ (for a = (pileup:heap-pop heap))
+ (when (pileup:heap-empty-p heap)
+ (return a)) ; todo handle single-leaf trees
+ (for b = (pileup:heap-pop heap))
+ (for weight = (+ (weight a) (weight b)))
+ (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight)
+ heap))))
+
+
+;;;; Code Maps ----------------------------------------------------------------
+(defun make-code-map (tree)
+ "Turn a Huffman `tree` into a code map.
+
+ A code map is a 256-element vector. The indexes are the bytes, and the values
+ are `(code . code-length)` conses, or `nil` if that byte was not present in
+ the tree.
+
+ "
+ (let ((result (make-array 256 :initial-element nil)))
+ (recursively ((node tree) (code 0) (code-length 0))
+ (etypecase node
+ (tree (recur (b0 node) (ash code 1) (1+ code-length))
+ (recur (b1 node) (+ (ash code 1) 1) (1+ code-length)))
+ (leaf (setf (aref result (value node))
+ (cons code (max 1 code-length))))))
+ result))
+
+(defun print-code-map (map)
+ (iterate
+ (with vals = (_ (iterate
+ (for item :in-vector map :with-index value)
+ (when item
+ (collect (list value (car item) (cdr item)))))
+ (stable-sort _ #'< :key #'second) ; sort by code
+ (stable-sort _ #'< :key #'third))) ; then by length
+ (for (value code code-length) :in vals)
+ (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%"
+ code-length code
+ value (code-char value)
+ code code-length)))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defun encode (byte map bits)
+ "Encode `byte` to `bits` using the Huffman `map`."
+ (destructuring-bind (code . code-length) (aref map byte)
+ (write-bits bits code code-length)))
+
+(defun decode (bits tree)
+ "Decode a byte from `bits` using the Huffman `tree`."
+ (recursively ((node tree))
+ (if (typep node 'leaf)
+ (value node)
+ (recur (if (zerop (read-bit bits))
+ (b0 node)
+ (b1 node))))))
+
+
+(defun compress% (in out)
+ (let* ((data (alexandria:read-stream-content-into-byte-vector in))
+ (counts (count-bytes data))
+ (chunks (counts-to-chunks counts))
+ (tree (make-tree counts))
+ (map (make-code-map tree)))
+ (print-chunks chunks out)
+ (iterate (for byte :in-vector data)
+ (encode byte map out))))
+
+(defparameter *compress-ui*
+ (adopt:make-interface
+ :name "c huffman"
+ :summary "simple Huffman algorithm"
+ :usage "[OPTIONS]"
+ :help "Compress using a simple Huffman algorithm."))
+
+(defparameter *extract-ui*
+ (adopt:make-interface
+ :name "e huffman"
+ :summary "simple Huffman algorithm"
+ :usage "[OPTIONS]"
+ :help "Extract using a simple Huffman algorithm."))
+
+(defun compress (arguments)
+ (multiple-value-bind (arguments options)
+ (adopt:parse-options *compress-ui* arguments)
+ (destructuring-bind (in-path out-path) arguments
+ (with-open-file (in in-path :direction :input :element-type 'u8)
+ (with-open-file (out out-path :direction :output :element-type 'u8)
+ (with-bits-output (out out)
+ (compress% in out)))))))
+
+(defun extract (arguments)
+ (multiple-value-bind (arguments options)
+ (adopt:parse-options *compress-ui* arguments)
+ (destructuring-bind (in-path out-path) arguments
+ (with-open-file (in in-path :direction :input :element-type 'u8)
+ (with-open-file (out out-path :direction :output :element-type 'u8)
+ (with-bits-input (in in)
+ (extract% in out)))))))
+
+(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress)
+(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)