lisp/lines.lisp @ a65fd2691c94 default tip
More
| author | Steve Losh <steve@stevelosh.com> |
|---|---|
| date | Mon, 03 Nov 2025 14:55:17 -0500 |
| parents | 2f06facce49e |
| children | (none) |
(eval-when (:compile-toplevel :load-toplevel :execute) (ql:quickload '(:adopt :cl-ppcre :with-user-abort) :silent t)) (defpackage :lines (:use :cl) (:export :toplevel :*ui*)) (in-package :lines) ;; TODO: Optimize by eliminating collection when all matchers are in order. ;;;; Configuration ------------------------------------------------------------ (defparameter *version* "1.0.0") (defparameter *index-base* 0) (defparameter *include-numbers* nil) (defparameter *limit* nil) ;;;; Functionality ------------------------------------------------------------ (defclass matcher () ((designator :type string :initarg :designator :accessor designator))) (defmethod print-object ((o matcher) stream) (print-unreadable-object (o stream :type t) (format stream "~S" (designator o)))) (defgeneric matches-line-p (matcher line-number)) (defgeneric map-matching-lines (matcher function min max)) (defgeneric maximum (matcher)) (defclass single-line-matcher (matcher) ((n :type (integer 0) :initarg :n :accessor n))) (defmethod matches-line-p ((matcher single-line-matcher) i) (= (n matcher) i)) (defmethod map-matching-lines ((matcher single-line-matcher) function min max) (when (<= min (n matcher) max) (funcall function (n matcher)))) (defmethod maximum ((matcher single-line-matcher)) (n matcher)) (defclass range-matcher (matcher) ((m :type (or null (integer 0)) :initarg :m :accessor m) (n :type (or null (integer 0)) :initarg :n :accessor n))) (defmethod matches-line-p ((matcher range-matcher) i) (let ((m (m matcher)) (n (n matcher))) (cond ((null m) (if (null n) t (<= i n))) ((null n) (<= m i)) (t (<= m i n))))) (defmethod map-matching-lines ((matcher range-matcher) function min max) (loop :with m = (or (m matcher) min) :with n = (or (n matcher) max) :for i :from (max m min) :to (min n max) :do (funcall function i))) (defmethod maximum ((matcher range-matcher)) (n matcher)) (defclass modulo-matcher (matcher) ((n :type (integer 0) :initarg :n :accessor n) (modulus :type (integer 1) :initarg :modulus :accessor modulus))) (defmethod matches-line-p ((matcher modulo-matcher) i) (= (mod i (modulus matcher)) (n matcher))) (defmethod map-matching-lines ((matcher modulo-matcher) function min max) (loop :with n = (n matcher) :with modulus = (modulus matcher) :with start = (if (< n min) (+ n modulus) n) :for i :from start :to max :by modulus :do (funcall function i))) (defmethod maximum ((matcher modulo-matcher)) nil) (defun parse-single-line-designator (designator) (ppcre:register-groups-bind ((#'parse-integer n)) ("^(\\d+)$" designator) (make-instance 'single-line-matcher :designator designator :n n))) (defun parse-range-designator (designator) (ppcre:register-groups-bind ((#'parse-integer m n)) ("^(\\d+)?-(\\d+)?$" designator) (when (and m n) (assert (<= m n) () "Bad line range designator ~S: start (~D) must not be greater than end (~D)." designator m n)) (make-instance 'range-matcher :designator designator :m m :n n))) (defun parse-modulo-designator (designator) (ppcre:register-groups-bind ((#'parse-integer n modulus)) ("^(\\d+)?m(\\d+)$" designator) (when (null n) (setf n 0)) (assert (< n modulus) () "Bad modulo designator ~S: offset (~D) must be less than modulus (~D)." designator n modulus) (make-instance 'modulo-matcher :designator designator :n n :modulus modulus))) (defun parse-line-designator (designator) (or (parse-single-line-designator designator) (parse-range-designator designator) (parse-modulo-designator designator) (error "Could not parse line designator ~S" designator))) (defun parse-line-designators (designators) (mapcar #'parse-line-designator (ppcre:split "," designators))) (defun dbg (&rest args) (apply #'format *error-output* args) (finish-output *error-output*)) (defun compute-limit (matchers) (let ((maxes (mapcar #'maximum matchers))) (unless (some #'null maxes) (reduce #'max maxes)))) (defun collect-lines (matchers) (loop :with limit = (compute-limit matchers) :with min = *index-base* :with max = nil :with lines = (make-hash-table) :for i :from min :for line = (read-line *standard-input* nil) :while line :while (or (null limit) (<= i limit)) :do (progn (setf max i) (dolist (matcher matchers) (when (matches-line-p matcher i) (setf (gethash i lines) line)))) :finally (return (values lines min max)))) (defun output-line (line i) (when i (format *standard-output* "~D " i)) (write-line line)) (defun run-slow (matchers) (let ((include-numbers *include-numbers*)) (multiple-value-bind (lines min max) (collect-lines matchers) (dolist (matcher matchers) (map-matching-lines matcher (lambda (i) (output-line (gethash i lines) (when include-numbers i))) min max))))) (defun run-fast (matcher) (loop :with limit = (when *limit* (+ *index-base* *limit*)) :with include-numbers = *include-numbers* :for i :from *index-base* :for line = (read-line *standard-input* nil) :while (or (null limit) (< i limit)) :while line :when (zerop (mod i 100000)) :do (progn #+sbcl (sb-ext:gc)) :when (matches-line-p matcher i) :do (output-line line (when include-numbers i)))) (defun run (line-designators) (let ((matchers (parse-line-designators line-designators))) (if (= 1 (length matchers)) (run-fast (first matchers)) (run-slow matchers))) (values)) ;;;; User Interface ----------------------------------------------------------- (adopt:defparameters (*option-debug* *option-no-debug*) (adopt:make-boolean-options 'debug :long "debug" :short #\d :help "Enable the Lisp debugger." :help-no "Disable the Lisp debugger (default).")) (defparameter *option-help* (adopt:make-option 'help :help "Display help and exit." :long "help" :short #\h :reduce (constantly t))) (defparameter *option-version* (adopt:make-option 'version :help "Display version information and exit." :long "version" :reduce (constantly t))) (defparameter *option-one-based* (adopt:make-option 'one-based :result-key 'index-base :help "Start numbering lines at 1." :short #\1 :long "one" :reduce (constantly 1))) (defparameter *option-zero-based* (adopt:make-option 'zero-based :result-key 'index-base :help "Start numbering lines at 0 (default)." :short #\0 :long "zero" :reduce (constantly 0))) (defparameter *option-other-base* (adopt:make-option 'other-base :result-key 'index-base :initial-value 0 :help "Start numbering lines at N." :parameter "N" :short #\s :long "start" :key #'parse-integer :reduce #'adopt:last)) (adopt:defparameters (*option-include-numbers* *option-no-include-numbers*) (adopt:make-boolean-options 'include-numbers :long "number" :short #\n :help "Add line numbers to the output." :help-no "Do not add line number to the output (default).")) (defparameter *option-limit* (adopt:make-option 'limit :help "Read at most N lines." :parameter "N" :short #\l :long "limit" :key #'parse-integer :reduce #'adopt:last)) (defparameter *option-no-limit* (adopt:make-option 'no-limit :result-key 'limit :help "Read all lines (default)." :short #\L :long "no-limit" :reduce (constantly nil))) (defparameter *option-input-encoding* (adopt:make-option 'input-encoding :help "Treat input as being encoded with ENC (default utf-8)." :parameter "ENC" :short #\i :long "input-encoding" :initial-value "utf-8" :reduce #'adopt:last)) (defparameter *option-output-encoding* (adopt:make-option 'output-encoding :help "Output text encoded with ENC (default utf-8)." :parameter "ENC" :short #\o :long "output-encoding" :initial-value "utf-8" :reduce #'adopt:last)) (defparameter *option-input-replacement* (adopt:make-option 'input-replacement :help "If an input character is not valid in the selected encoding, replace it with REP." :parameter "REP" :short #\r :long "input-replacement" :reduce #'adopt:last)) (defparameter *option-no-input-replacement* (adopt:make-option 'no-input-replacement :result-key 'input-replacement :help "If an input character is not valid in the selected encoding, return an error (default)." :short #\R :long "no-input-replacement" :reduce (constantly nil))) (defparameter *option-output-replacement* (adopt:make-option 'output-replacement :help "If an output character would not be not valid in the selected encoding, replace it with REP." :parameter "REP" :short #\q :long "output-replacement" :reduce #'adopt:last)) (defparameter *option-no-output-replacement* (adopt:make-option 'no-output-replacement :result-key 'output-replacement :help "If an output character would not be valid in the selected encoding, return an error (default)." :short #\Q :long "no-output-replacement" :reduce (constantly nil))) (adopt:define-string *help-text* "lines takes a string denoting which lines to print, and prints those lines ~ of standard input.") (defparameter *examples* '(("Print line 1234:" . "cat log.txt | lines 1234") ("Print line 2, then line 1:" . "cat log.txt | lines 2,1") ("Print lines 6, 12, 100-200, and 999+ (1-indexed):" . "cat log.txt | lines -1 6,12,100-200,999-") ("Print lines 0, 2, 4, 6, …:" . "cat log.txt | lines m2") ("Print the third line of every set of four lines:" . "cat foo.fastq | lines 3m4"))) (defparameter *ui* (adopt:make-interface :name "lines" :usage "[OPTIONS] LINE-DESIGNATORS" :summary "print designated lines of standard input" :help *help-text* :examples *examples* :contents (list *option-limit* *option-no-limit* *option-help* *option-version* *option-debug* *option-no-debug* (adopt:make-group 'character-encodings :title "Character Encodings" :options (list *option-input-encoding* *option-input-replacement* *option-no-input-replacement* *option-output-encoding* *option-output-replacement* *option-no-output-replacement*)) (adopt:make-group 'line-numbering :title "Line Numbering" :options (list *option-one-based* *option-zero-based* *option-other-base* *option-include-numbers* *option-no-include-numbers*))))) (defmacro without-debugger (&body body) `(multiple-value-prog1 (progn #+sbcl (sb-ext:disable-debugger) (progn ,@body)) (progn #+sbcl (sb-ext:enable-debugger)))) (defmacro exit-on-error (&body body) `(without-debugger (handler-case (progn ,@body) #+sbcl (sb-int:broken-pipe () (adopt:exit)) (error (c) (adopt:print-error-and-exit c))))) (defmacro exit-on-error-unless (expr &body body) `(if ,expr (progn ,@body) (exit-on-error ,@body))) (defmacro exit-on-ctrl-c (&body body) `(handler-case (with-user-abort:with-user-abort (progn ,@body)) (with-user-abort:user-abort () (adopt:exit 130)))) (defun determine-external-format (encoding replacement) (let ((encoding (intern (string-upcase encoding) :keyword))) (if (null replacement) encoding (list encoding :replacement replacement)))) (defun toplevel () (exit-on-ctrl-c (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*) (exit-on-error-unless (gethash 'debug options) (cond ((gethash 'help options) (adopt:print-help-and-exit *ui*)) ((gethash 'version options) (write-line *version*) (adopt:exit)) ((null arguments) (error "LINE-DESIGNATORS are required.")) (t (destructuring-bind (line-designators . more) arguments (when more (error "Unrecognized command line arguments: ~S" more)) (let ((*index-base* (gethash 'index-base options)) (*include-numbers* (gethash 'include-numbers options)) (*limit* (gethash 'limit options)) (input-format (determine-external-format (gethash 'input-encoding options) (gethash 'input-replacement options))) (output-format (determine-external-format (gethash 'output-encoding options) (gethash 'output-replacement options)))) (with-open-file (*standard-input* "/dev/stdin" :external-format input-format) (with-open-file (*standard-output* "/dev/stdout" :external-format output-format :direction :output :if-exists :append) (run line-designators))))))))))) #; Scratch -------------------------------------------------------------------- (let ((*standard-input* (make-string-input-stream "0 a foo 1 b bar 2 c baz 3 d hello 4 a world 5 b yes 6 c this 7 d is 8 a cat 9 b meow"))) (run "4m4"))