# HG changeset patch # User Steve Losh # Date 1595791255 14400 # Node ID b2decfc394fa8f6ba39a695c929f90242ec46b9d # Parent 716d1110c12fd4106c7e1d4e3fe067b43c344b69 Rename file diff -r 716d1110c12f -r b2decfc394fa .hgignore --- a/.hgignore Sat Jul 25 15:48:24 2020 -0400 +++ b/.hgignore Sun Jul 26 15:20:55 2020 -0400 @@ -1,4 +1,4 @@ -syntax:regex +syntax:regexp ^c$ ^e$ scratch.lisp diff -r 716d1110c12f -r b2decfc394fa src/hamming.lisp --- a/src/hamming.lisp Sat Jul 25 15:48:24 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,188 +0,0 @@ -(in-package :tdcb/huffman) - -;;;; Counts and Chunks -------------------------------------------------------- -(defun count-bytes (byte-vector) - (loop :with result = (make-array 256 :initial-element 0) - :for b :across byte-vector - :do (incf (aref result b)) - :finally (return result))) - -(defun next-chunk (counts &key (start 0) (skips 0)) - (when-let ((start (position-if #'plusp counts :start start))) - (iterate (with end = nil) - (with skips-remaining = skips) - (for c :in-vector counts :with-index i :from start) - (if (plusp c) - (setf end i - skips-remaining skips) - (when (minusp (decf skips-remaining)) - (finish))) - (returning (list start end (subseq counts start (1+ end))) - i)))) - -(defun counts-to-chunks (counts &key (skips 0)) - (iterate - (with start = 0) - (for (values chunk i) = (next-chunk counts :start start :skips skips)) - (while chunk) - (collect chunk) - (setf start i))) - -(defun chunks-to-counts (chunks) - (let ((result (make-array 256 :initial-element 0))) - (iterate (for (start end vals) :in chunks) - (replace result vals :start1 start :end1 (1+ end))) - result)) - -(defun print-chunks (chunks stream) - (if (null chunks) - (write-octets stream #(0 0 0)) - (iterate (for (start end counts) :in chunks) - (write-octet stream start) - (write-octet stream end) - (write-octets stream counts) - (finally (write-octet stream 0))))) - -(defun read-chunks (stream) - (iterate (for start = (read-octet stream)) - (until (and (zerop start) (not (first-time-p)))) - (for end = (read-octet stream)) - (for chunk = (read-octets stream (1+ (- end start)))) - (collect (list start end chunk)))) - - -;;;; Trees -------------------------------------------------------------------- -(defclass* tree () - (b0 b1 weight)) - -(defclass* leaf () - (value weight)) - -(defmethod print-object ((o leaf) stream) - (print-unreadable-object (o stream :type t) - (format stream "~D ~S weight ~D" - (value o) - (code-char (value o)) - (weight o)))) - -(defmethod print-object ((o tree) stream) - (print-unreadable-object (o stream :type t) - (format stream "weight ~D" (weight o)))) - -(defun print-tree (tree) - (recursively ((node tree) (indent 0)) - (format t "~vA~A~%" indent "" node) - (when (typep node 'tree) - (recur (b0 node) (+ indent 4)) - (recur (b1 node) (+ indent 4)))) - tree) - -(defun make-tree (counts) - "Create a Huffman tree from `counts`." - (let ((heap (pileup:make-heap #'< :key #'weight :size 256))) - (iterate (for c :in-vector counts :with-index i) - (unless (zerop c) - (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap))) - (iterate - (for a = (pileup:heap-pop heap)) - (when (pileup:heap-empty-p heap) - (return a)) ; todo handle single-leaf trees - (for b = (pileup:heap-pop heap)) - (for weight = (+ (weight a) (weight b))) - (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight) - heap)))) - - -;;;; Code Maps ---------------------------------------------------------------- -(defun make-code-map (tree) - "Turn a Huffman `tree` into a code map. - - A code map is a 256-element vector. The indexes are the bytes, and the values - are `(code . code-length)` conses, or `nil` if that byte was not present in - the tree. - - " - (let ((result (make-array 256 :initial-element nil))) - (recursively ((node tree) (code 0) (code-length 0)) - (etypecase node - (tree (recur (b0 node) (ash code 1) (1+ code-length)) - (recur (b1 node) (+ (ash code 1) 1) (1+ code-length))) - (leaf (setf (aref result (value node)) - (cons code (max 1 code-length)))))) - result)) - -(defun print-code-map (map) - (iterate - (with vals = (_ (iterate - (for item :in-vector map :with-index value) - (when item - (collect (list value (car item) (cdr item))))) - (stable-sort _ #'< :key #'second) ; sort by code - (stable-sort _ #'< :key #'third))) ; then by length - (for (value code code-length) :in vals) - (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%" - code-length code - value (code-char value) - code code-length))) - - -;;;; Trees -------------------------------------------------------------------- -(defun encode (byte map bits) - "Encode `byte` to `bits` using the Huffman `map`." - (destructuring-bind (code . code-length) (aref map byte) - (write-bits bits code code-length))) - -(defun decode (bits tree) - "Decode a byte from `bits` using the Huffman `tree`." - (recursively ((node tree)) - (if (typep node 'leaf) - (value node) - (recur (if (zerop (read-bit bits)) - (b0 node) - (b1 node)))))) - - -(defun compress% (in out) - (let* ((data (alexandria:read-stream-content-into-byte-vector in)) - (counts (count-bytes data)) - (chunks (counts-to-chunks counts)) - (tree (make-tree counts)) - (map (make-code-map tree))) - (print-chunks chunks out) - (iterate (for byte :in-vector data) - (encode byte map out)))) - -(defparameter *compress-ui* - (adopt:make-interface - :name "c huffman" - :summary "simple Huffman algorithm" - :usage "[OPTIONS]" - :help "Compress using a simple Huffman algorithm.")) - -(defparameter *extract-ui* - (adopt:make-interface - :name "e huffman" - :summary "simple Huffman algorithm" - :usage "[OPTIONS]" - :help "Extract using a simple Huffman algorithm.")) - -(defun compress (arguments) - (multiple-value-bind (arguments options) - (adopt:parse-options *compress-ui* arguments) - (destructuring-bind (in-path out-path) arguments - (with-open-file (in in-path :direction :input :element-type 'u8) - (with-open-file (out out-path :direction :output :element-type 'u8) - (with-bits-output (out out) - (compress% in out))))))) - -(defun extract (arguments) - (multiple-value-bind (arguments options) - (adopt:parse-options *compress-ui* arguments) - (destructuring-bind (in-path out-path) arguments - (with-open-file (in in-path :direction :input :element-type 'u8) - (with-open-file (out out-path :direction :output :element-type 'u8) - (with-bits-input (in in) - (extract% in out))))))) - -(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress) -(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract) diff -r 716d1110c12f -r b2decfc394fa src/huffman.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/huffman.lisp Sun Jul 26 15:20:55 2020 -0400 @@ -0,0 +1,188 @@ +(in-package :tdcb/huffman) + +;;;; Counts and Chunks -------------------------------------------------------- +(defun count-bytes (byte-vector) + (loop :with result = (make-array 256 :initial-element 0) + :for b :across byte-vector + :do (incf (aref result b)) + :finally (return result))) + +(defun next-chunk (counts &key (start 0) (skips 0)) + (when-let ((start (position-if #'plusp counts :start start))) + (iterate (with end = nil) + (with skips-remaining = skips) + (for c :in-vector counts :with-index i :from start) + (if (plusp c) + (setf end i + skips-remaining skips) + (when (minusp (decf skips-remaining)) + (finish))) + (returning (list start end (subseq counts start (1+ end))) + i)))) + +(defun counts-to-chunks (counts &key (skips 0)) + (iterate + (with start = 0) + (for (values chunk i) = (next-chunk counts :start start :skips skips)) + (while chunk) + (collect chunk) + (setf start i))) + +(defun chunks-to-counts (chunks) + (let ((result (make-array 256 :initial-element 0))) + (iterate (for (start end vals) :in chunks) + (replace result vals :start1 start :end1 (1+ end))) + result)) + +(defun print-chunks (chunks stream) + (if (null chunks) + (write-octets stream #(0 0 0)) + (iterate (for (start end counts) :in chunks) + (write-octet stream start) + (write-octet stream end) + (write-octets stream counts) + (finally (write-octet stream 0))))) + +(defun read-chunks (stream) + (iterate (for start = (read-octet stream)) + (until (and (zerop start) (not (first-time-p)))) + (for end = (read-octet stream)) + (for chunk = (read-octets stream (1+ (- end start)))) + (collect (list start end chunk)))) + + +;;;; Trees -------------------------------------------------------------------- +(defclass* tree () + (b0 b1 weight)) + +(defclass* leaf () + (value weight)) + +(defmethod print-object ((o leaf) stream) + (print-unreadable-object (o stream :type t) + (format stream "~D ~S weight ~D" + (value o) + (code-char (value o)) + (weight o)))) + +(defmethod print-object ((o tree) stream) + (print-unreadable-object (o stream :type t) + (format stream "weight ~D" (weight o)))) + +(defun print-tree (tree) + (recursively ((node tree) (indent 0)) + (format t "~vA~A~%" indent "" node) + (when (typep node 'tree) + (recur (b0 node) (+ indent 4)) + (recur (b1 node) (+ indent 4)))) + tree) + +(defun make-tree (counts) + "Create a Huffman tree from `counts`." + (let ((heap (pileup:make-heap #'< :key #'weight :size 256))) + (iterate (for c :in-vector counts :with-index i) + (unless (zerop c) + (pileup:heap-insert (make-instance 'leaf :value i :weight c) heap))) + (iterate + (for a = (pileup:heap-pop heap)) + (when (pileup:heap-empty-p heap) + (return a)) ; todo handle single-leaf trees + (for b = (pileup:heap-pop heap)) + (for weight = (+ (weight a) (weight b))) + (pileup:heap-insert (make-instance 'tree :b0 a :b1 b :weight weight) + heap)))) + + +;;;; Code Maps ---------------------------------------------------------------- +(defun make-code-map (tree) + "Turn a Huffman `tree` into a code map. + + A code map is a 256-element vector. The indexes are the bytes, and the values + are `(code . code-length)` conses, or `nil` if that byte was not present in + the tree. + + " + (let ((result (make-array 256 :initial-element nil))) + (recursively ((node tree) (code 0) (code-length 0)) + (etypecase node + (tree (recur (b0 node) (ash code 1) (1+ code-length)) + (recur (b1 node) (+ (ash code 1) 1) (1+ code-length))) + (leaf (setf (aref result (value node)) + (cons code (max 1 code-length)))))) + result)) + +(defun print-code-map (map) + (iterate + (with vals = (_ (iterate + (for item :in-vector map :with-index value) + (when item + (collect (list value (car item) (cdr item))))) + (stable-sort _ #'< :key #'second) ; sort by code + (stable-sort _ #'< :key #'third))) ; then by length + (for (value code code-length) :in vals) + (format t "~12<~v,'0B:~;~> byte #x~2,'0X ~S has code ~D (length ~D)~%" + code-length code + value (code-char value) + code code-length))) + + +;;;; Trees -------------------------------------------------------------------- +(defun encode (byte map bits) + "Encode `byte` to `bits` using the Huffman `map`." + (destructuring-bind (code . code-length) (aref map byte) + (write-bits bits code code-length))) + +(defun decode (bits tree) + "Decode a byte from `bits` using the Huffman `tree`." + (recursively ((node tree)) + (if (typep node 'leaf) + (value node) + (recur (if (zerop (read-bit bits)) + (b0 node) + (b1 node)))))) + + +(defun compress% (in out) + (let* ((data (alexandria:read-stream-content-into-byte-vector in)) + (counts (count-bytes data)) + (chunks (counts-to-chunks counts)) + (tree (make-tree counts)) + (map (make-code-map tree))) + (print-chunks chunks out) + (iterate (for byte :in-vector data) + (encode byte map out)))) + +(defparameter *compress-ui* + (adopt:make-interface + :name "c huffman" + :summary "simple Huffman algorithm" + :usage "[OPTIONS]" + :help "Compress using a simple Huffman algorithm.")) + +(defparameter *extract-ui* + (adopt:make-interface + :name "e huffman" + :summary "simple Huffman algorithm" + :usage "[OPTIONS]" + :help "Extract using a simple Huffman algorithm.")) + +(defun compress (arguments) + (multiple-value-bind (arguments options) + (adopt:parse-options *compress-ui* arguments) + (destructuring-bind (in-path out-path) arguments + (with-open-file (in in-path :direction :input :element-type 'u8) + (with-open-file (out out-path :direction :output :element-type 'u8) + (with-bits-output (out out) + (compress% in out))))))) + +(defun extract (arguments) + (multiple-value-bind (arguments options) + (adopt:parse-options *compress-ui* arguments) + (destructuring-bind (in-path out-path) arguments + (with-open-file (in in-path :direction :input :element-type 'u8) + (with-open-file (out out-path :direction :output :element-type 'u8) + (with-bits-input (in in) + (extract% in out))))))) + +(tdcb/main:ensure-compressor "huffman" "simple Huffman algorithm" #'compress) +(tdcb/main:ensure-extractor "huffman" "simple Huffman algorithm" #'extract)