--- a/src/bits.lisp Sat Jul 25 15:48:15 2020 -0400
+++ b/src/bits.lisp Sat Jul 25 15:48:24 2020 -0400
@@ -31,6 +31,15 @@
(ashf mask -1)))
byte)
+(defun write-octet (b octet)
+ (write-bits b octet 8))
+
+(defun write-octets (b sequence)
+ (map nil (lambda (octet)
+ (write-bits b octet 8))
+ sequence))
+
+
(defun flush (b)
(unless (= (mask b) #x80)
(write-byte (rack b) (underlying-stream b))))
@@ -59,6 +68,16 @@
(ashf mask -1))
:finally (return result)))
+(defun read-octet (b)
+ (read-bits b 8))
+
+(defun read-octets (b count)
+ (iterate
+ (with result = (make-array count))
+ (for i :from 0 :below count)
+ (setf (aref result i) (read-octet b))
+ (returning result)))
+
;;;; Create -------------------------------------------------------------------
(defmacro with-bits-output ((var stream) &body body)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hamming.lisp Sat Jul 25 15:48:24 2020 -0400
@@ -0,0 +1,188 @@
+(in-package :tdcb/huffman)
+
+;;;; Counts and Chunks --------------------------------------------------------
+(defun count-bytes (byte-vector)
+ (loop :with result = (make-array 256 :initial-element 0)
+ :for b :across byte-vector
+ :do (incf (aref result b))
+ :finally (return result)))
+
+(defun next-chunk (counts &key (start 0) (skips 0))
+ (when-let ((start (position-if #'plusp counts :start start)))
+ (iterate (with end = nil)
+ (with skips-remaining = skips)
+ (for c :in-vector counts :with-index i :from start)
+ (if (plusp c)
+ (setf end i
+ skips-remaining skips)
+ (when (minusp (decf skips-remaining))
+ (finish)))
+ (returning (list start end (subseq counts start (1+ end)))
+ i))))
+
+(defun counts-to-chunks (counts &key (skips 0))
+ (iterate
+ (with start = 0)
+ (for (values chunk i) = (next-chunk counts :start start :skips skips))
+ (while chunk)
+ (collect chunk)
+ (setf start i)))
+
+(defun chunks-to-counts (chunks)
+ (let ((result (make-array 256 :initial-element 0)))
+ (iterate (for (start end vals) :in chunks)
+ (replace result vals :start1 start :end1 (1+ end)))
+ result))
+
+(defun print-chunks (chunks stream)
+ (if (null chunks)
+ (write-octets stream #(0 0 0))
+ (iterate (for (start end counts) :in chunks)
+ (write-octet stream start)
+ (write-octet stream end)
+ (write-octets stream counts)
+ (finally (write-octet stream 0)))))
+
+(defun read-chunks (stream)
+ (iterate (for start = (read-octet stream))
+ (until (and (zerop start) (not (first-time-p))))
+ (for end = (read-octet stream))
+ (for chunk = (read-octets stream (1+ (- end start))))
+ (collect (list start end chunk))))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defclass* tree ()
+ (b0 b1 weight))
+
+(defclass* leaf ()
+ (value weight))
+
+(defmethod print-object ((o leaf) stream)
+ (print-unreadable-object (o stream :type t)
+ (format stream "~D ~S weight ~D"
+ (value o)
+ (code-char (value o))
+ (weight o))))
+
+(defmethod print-object ((o tree) stream)
+ (print-unreadable-object (o stream :type t)
+ (format stream "weight ~D" (weight o))))
+
+(defun print-tree (tree)
+ (recursively ((node tree) (indent 0))
+ (format t "~vA~A~%" indent "" node)
+ (when (typep node 'tree)
+ (recur (b0 node) (+ indent 4))
+ (recur (b1 node) (+ indent 4))))
+ tree)
+
+(defun make-tree (counts)
+ "Create a Huffman tree from `counts`."
+ (let ((heap (pileup:make-heap #'< :key #'weight :size 256)))
+ (iterate (for c :in-vector counts :with-index i)
+ (unless (zerop c)
+ (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap)))
+ (iterate
+ (for a = (pileup:heap-pop heap))
+ (when (pileup:heap-empty-p heap)
+ (return a)) ; todo handle single-leaf trees
+ (for b = (pileup:heap-pop heap))
+ (for weight = (+ (weight a) (weight b)))
+ (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight)
+ heap))))
+
+
+;;;; Code Maps ----------------------------------------------------------------
+(defun make-code-map (tree)
+ "Turn a Huffman `tree` into a code map.
+
+ A code map is a 256-element vector. The indexes are the bytes, and the values
+ are `(code . code-length)` conses, or `nil` if that byte was not present in
+ the tree.
+
+ "
+ (let ((result (make-array 256 :initial-element nil)))
+ (recursively ((node tree) (code 0) (code-length 0))
+ (etypecase node
+ (tree (recur (b0 node) (ash code 1) (1+ code-length))
+ (recur (b1 node) (+ (ash code 1) 1) (1+ code-length)))
+ (leaf (setf (aref result (value node))
+ (cons code (max 1 code-length))))))
+ result))
+
+(defun print-code-map (map)
+ (iterate
+ (with vals = (_ (iterate
+ (for item :in-vector map :with-index value)
+ (when item
+ (collect (list value (car item) (cdr item)))))
+ (stable-sort _ #'< :key #'second) ; sort by code
+ (stable-sort _ #'< :key #'third))) ; then by length
+ (for (value code code-length) :in vals)
+ (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%"
+ code-length code
+ value (code-char value)
+ code code-length)))
+
+
+;;;; Trees --------------------------------------------------------------------
+(defun encode (byte map bits)
+ "Encode `byte` to `bits` using the Huffman `map`."
+ (destructuring-bind (code . code-length) (aref map byte)
+ (write-bits bits code code-length)))
+
+(defun decode (bits tree)
+ "Decode a byte from `bits` using the Huffman `tree`."
+ (recursively ((node tree))
+ (if (typep node 'leaf)
+ (value node)
+ (recur (if (zerop (read-bit bits))
+ (b0 node)
+ (b1 node))))))
+
+
+(defun compress% (in out)
+ (let* ((data (alexandria:read-stream-content-into-byte-vector in))
+ (counts (count-bytes data))
+ (chunks (counts-to-chunks counts))
+ (tree (make-tree counts))
+ (map (make-code-map tree)))
+ (print-chunks chunks out)
+ (iterate (for byte :in-vector data)
+ (encode byte map out))))
+
+(defparameter *compress-ui*
+ (adopt:make-interface
+ :name "c huffman"
+ :summary "simple Huffman algorithm"
+ :usage "[OPTIONS]"
+ :help "Compress using a simple Huffman algorithm."))
+
+(defparameter *extract-ui*
+ (adopt:make-interface
+ :name "e huffman"
+ :summary "simple Huffman algorithm"
+ :usage "[OPTIONS]"
+ :help "Extract using a simple Huffman algorithm."))
+
+(defun compress (arguments)
+ (multiple-value-bind (arguments options)
+ (adopt:parse-options *compress-ui* arguments)
+ (destructuring-bind (in-path out-path) arguments
+ (with-open-file (in in-path :direction :input :element-type 'u8)
+ (with-open-file (out out-path :direction :output :element-type 'u8)
+ (with-bits-output (out out)
+ (compress% in out)))))))
+
+(defun extract (arguments)
+ (multiple-value-bind (arguments options)
+ (adopt:parse-options *compress-ui* arguments)
+ (destructuring-bind (in-path out-path) arguments
+ (with-open-file (in in-path :direction :input :element-type 'u8)
+ (with-open-file (out out-path :direction :output :element-type 'u8)
+ (with-bits-input (in in)
+ (extract% in out)))))))
+
+(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress)
+(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)
--- a/src/package.lisp Sat Jul 25 15:48:15 2020 -0400
+++ b/src/package.lisp Sat Jul 25 15:48:24 2020 -0400
@@ -1,6 +1,12 @@
(defpackage :tdcb
- (:use :cl)
- (:export))
+ (:use :cl :iterate :losh)
+ (:export
+ :ashf :logiorf :u8
+ :with-bits-input :with-bits-output
+ :read-bit :read-bits
+ :read-octet :read-octets
+ :write-bit :write-bits
+ :write-octet :write-octets))
(defpackage :tdcb/main
(:use :cl)
@@ -9,3 +15,7 @@
:build/extract
:ensure-compressor
:ensure-extractor))
+
+(defpackage :tdcb/huffman
+ (:use :cl :tdcb :iterate :losh)
+ (:export))
--- a/src/utilities.lisp Sat Jul 25 15:48:15 2020 -0400
+++ b/src/utilities.lisp Sat Jul 25 15:48:24 2020 -0400
@@ -3,5 +3,6 @@
(define-modify-macro ashf (count) ash "Arithmetic shift in place")
(define-modify-macro logiorf (&rest integers) logior "Logical inclusive or in place")
+(deftype u8 () '(unsigned-byte 8))
--- a/tdcb.asd Sat Jul 25 15:48:15 2020 -0400
+++ b/tdcb.asd Sat Jul 25 15:48:24 2020 -0400
@@ -4,13 +4,19 @@
:license "MIT"
- :depends-on (:adopt)
+ :depends-on (:adopt
+ :alexandria
+ :losh
+ :iterate
+ :flexi-streams
+ :pileup)
:serial t
:components ((:module "src" :serial t
:components ((:file "package")
(:file "utilities")
(:file "bits")
- (:file "main")))))
+ (:file "main")
+ (:file "huffman")))))