# HG changeset patch # User Steve Losh # Date 1595706504 14400 # Node ID 716d1110c12fd4106c7e1d4e3fe067b43c344b69 # Parent c689117887e373f0fa66d06b438c689843c146df Initial Huffman skeleton diff -r c689117887e3 -r 716d1110c12f src/bits.lisp --- a/src/bits.lisp Sat Jul 25 15:48:15 2020 -0400 +++ b/src/bits.lisp Sat Jul 25 15:48:24 2020 -0400 @@ -31,6 +31,15 @@ (ashf mask -1))) byte) +(defun write-octet (b octet) + (write-bits b octet 8)) + +(defun write-octets (b sequence) + (map nil (lambda (octet) + (write-bits b octet 8)) + sequence)) + + (defun flush (b) (unless (= (mask b) #x80) (write-byte (rack b) (underlying-stream b)))) @@ -59,6 +68,16 @@ (ashf mask -1)) :finally (return result))) +(defun read-octet (b) + (read-bits b 8)) + +(defun read-octets (b count) + (iterate + (with result = (make-array count)) + (for i :from 0 :below count) + (setf (aref result i) (read-octet b)) + (returning result))) + ;;;; Create ------------------------------------------------------------------- (defmacro with-bits-output ((var stream) &body body) diff -r c689117887e3 -r 716d1110c12f src/hamming.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hamming.lisp Sat Jul 25 15:48:24 2020 -0400 @@ -0,0 +1,188 @@ +(in-package :tdcb/huffman) + +;;;; Counts and Chunks -------------------------------------------------------- +(defun count-bytes (byte-vector) + (loop :with result = (make-array 256 :initial-element 0) + :for b :across byte-vector + :do (incf (aref result b)) + :finally (return result))) + +(defun next-chunk (counts &key (start 0) (skips 0)) + (when-let ((start (position-if #'plusp counts :start start))) + (iterate (with end = nil) + (with skips-remaining = skips) + (for c :in-vector counts :with-index i :from start) + (if (plusp c) + (setf end i + skips-remaining skips) + (when (minusp (decf skips-remaining)) + (finish))) + (returning (list start end (subseq counts start (1+ end))) + i)))) + +(defun counts-to-chunks (counts &key (skips 0)) + (iterate + (with start = 0) + (for (values chunk i) = (next-chunk counts :start start :skips skips)) + (while chunk) + (collect chunk) + (setf start i))) + +(defun chunks-to-counts (chunks) + (let ((result (make-array 256 :initial-element 0))) + (iterate (for (start end vals) :in chunks) + (replace result vals :start1 start :end1 (1+ end))) + result)) + +(defun print-chunks (chunks stream) + (if (null chunks) + (write-octets stream #(0 0 0)) + (iterate (for (start end counts) :in chunks) + (write-octet stream start) + (write-octet stream end) + (write-octets stream counts) + (finally (write-octet stream 0))))) + +(defun read-chunks (stream) + (iterate (for start = (read-octet stream)) + (until (and (zerop start) (not (first-time-p)))) + (for end = (read-octet stream)) + (for chunk = (read-octets stream (1+ (- end start)))) + (collect (list start end chunk)))) + + +;;;; Trees -------------------------------------------------------------------- +(defclass* tree () + (b0 b1 weight)) + +(defclass* leaf () + (value weight)) + +(defmethod print-object ((o leaf) stream) + (print-unreadable-object (o stream :type t) + (format stream "~D ~S weight ~D" + (value o) + (code-char (value o)) + (weight o)))) + +(defmethod print-object ((o tree) stream) + (print-unreadable-object (o stream :type t) + (format stream "weight ~D" (weight o)))) + +(defun print-tree (tree) + (recursively ((node tree) (indent 0)) + (format t "~vA~A~%" indent "" node) + (when (typep node 'tree) + (recur (b0 node) (+ indent 4)) + (recur (b1 node) (+ indent 4)))) + tree) + +(defun make-tree (counts) + "Create a Huffman tree from `counts`." + (let ((heap (pileup:make-heap #'< :key #'weight :size 256))) + (iterate (for c :in-vector counts :with-index i) + (unless (zerop c) + (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap))) + (iterate + (for a = (pileup:heap-pop heap)) + (when (pileup:heap-empty-p heap) + (return a)) ; todo handle single-leaf trees + (for b = (pileup:heap-pop heap)) + (for weight = (+ (weight a) (weight b))) + (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight) + heap)))) + + +;;;; Code Maps ---------------------------------------------------------------- +(defun make-code-map (tree) + "Turn a Huffman `tree` into a code map. + + A code map is a 256-element vector. The indexes are the bytes, and the values + are `(code . code-length)` conses, or `nil` if that byte was not present in + the tree. + + " + (let ((result (make-array 256 :initial-element nil))) + (recursively ((node tree) (code 0) (code-length 0)) + (etypecase node + (tree (recur (b0 node) (ash code 1) (1+ code-length)) + (recur (b1 node) (+ (ash code 1) 1) (1+ code-length))) + (leaf (setf (aref result (value node)) + (cons code (max 1 code-length)))))) + result)) + +(defun print-code-map (map) + (iterate + (with vals = (_ (iterate + (for item :in-vector map :with-index value) + (when item + (collect (list value (car item) (cdr item))))) + (stable-sort _ #'< :key #'second) ; sort by code + (stable-sort _ #'< :key #'third))) ; then by length + (for (value code code-length) :in vals) + (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%" + code-length code + value (code-char value) + code code-length))) + + +;;;; Trees -------------------------------------------------------------------- +(defun encode (byte map bits) + "Encode `byte` to `bits` using the Huffman `map`." + (destructuring-bind (code . code-length) (aref map byte) + (write-bits bits code code-length))) + +(defun decode (bits tree) + "Decode a byte from `bits` using the Huffman `tree`." + (recursively ((node tree)) + (if (typep node 'leaf) + (value node) + (recur (if (zerop (read-bit bits)) + (b0 node) + (b1 node)))))) + + +(defun compress% (in out) + (let* ((data (alexandria:read-stream-content-into-byte-vector in)) + (counts (count-bytes data)) + (chunks (counts-to-chunks counts)) + (tree (make-tree counts)) + (map (make-code-map tree))) + (print-chunks chunks out) + (iterate (for byte :in-vector data) + (encode byte map out)))) + +(defparameter *compress-ui* + (adopt:make-interface + :name "c huffman" + :summary "simple Huffman algorithm" + :usage "[OPTIONS]" + :help "Compress using a simple Huffman algorithm.")) + +(defparameter *extract-ui* + (adopt:make-interface + :name "e huffman" + :summary "simple Huffman algorithm" + :usage "[OPTIONS]" + :help "Extract using a simple Huffman algorithm.")) + +(defun compress (arguments) + (multiple-value-bind (arguments options) + (adopt:parse-options *compress-ui* arguments) + (destructuring-bind (in-path out-path) arguments + (with-open-file (in in-path :direction :input :element-type 'u8) + (with-open-file (out out-path :direction :output :element-type 'u8) + (with-bits-output (out out) + (compress% in out))))))) + +(defun extract (arguments) + (multiple-value-bind (arguments options) + (adopt:parse-options *compress-ui* arguments) + (destructuring-bind (in-path out-path) arguments + (with-open-file (in in-path :direction :input :element-type 'u8) + (with-open-file (out out-path :direction :output :element-type 'u8) + (with-bits-input (in in) + (extract% in out))))))) + +(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress) +(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract) diff -r c689117887e3 -r 716d1110c12f src/package.lisp --- a/src/package.lisp Sat Jul 25 15:48:15 2020 -0400 +++ b/src/package.lisp Sat Jul 25 15:48:24 2020 -0400 @@ -1,6 +1,12 @@ (defpackage :tdcb - (:use :cl) - (:export)) + (:use :cl :iterate :losh) + (:export + :ashf :logiorf :u8 + :with-bits-input :with-bits-output + :read-bit :read-bits + :read-octet :read-octets + :write-bit :write-bits + :write-octet :write-octets)) (defpackage :tdcb/main (:use :cl) @@ -9,3 +15,7 @@ :build/extract :ensure-compressor :ensure-extractor)) + +(defpackage :tdcb/huffman + (:use :cl :tdcb :iterate :losh) + (:export)) diff -r c689117887e3 -r 716d1110c12f src/utilities.lisp --- a/src/utilities.lisp Sat Jul 25 15:48:15 2020 -0400 +++ b/src/utilities.lisp Sat Jul 25 15:48:24 2020 -0400 @@ -3,5 +3,6 @@ (define-modify-macro ashf (count) ash "Arithmetic shift in place") (define-modify-macro logiorf (&rest integers) logior "Logical inclusive or in place") +(deftype u8 () '(unsigned-byte 8)) diff -r c689117887e3 -r 716d1110c12f tdcb.asd --- a/tdcb.asd Sat Jul 25 15:48:15 2020 -0400 +++ b/tdcb.asd Sat Jul 25 15:48:24 2020 -0400 @@ -4,13 +4,19 @@ :license "MIT" - :depends-on (:adopt) + :depends-on (:adopt + :alexandria + :losh + :iterate + :flexi-streams + :pileup) :serial t :components ((:module "src" :serial t :components ((:file "package") (:file "utilities") (:file "bits") - (:file "main"))))) + (:file "main") + (:file "huffman")))))