More
author |
Steve Losh <steve@stevelosh.com> |
date |
Sun, 18 Apr 2021 13:50:16 -0400 |
parents |
4139b0e71e08 |
children |
2f06facce49e |
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload '(:adopt :cl-ppcre :with-user-abort) :silent t))
(defpackage :lines
(:use :cl)
(:export :toplevel :*ui*))
(in-package :lines)
;; TODO: Optimize by eliminating collection when all matchers are in order.
;;;; Configuration ------------------------------------------------------------
(defparameter *version* "1.0.0")
(defparameter *index-base* 0)
(defparameter *include-numbers* nil)
(defparameter *limit* nil)
;;;; Functionality ------------------------------------------------------------
(defclass matcher ()
((designator :type string :initarg :designator :accessor designator)))
(defmethod print-object ((o matcher) stream)
(print-unreadable-object (o stream :type t)
(format stream "~S" (designator o))))
(defgeneric matches-line-p (matcher line-number))
(defgeneric map-matching-lines (matcher function min max))
(defgeneric maximum (matcher))
(defclass single-line-matcher (matcher)
((n :type (integer 0) :initarg :n :accessor n)))
(defmethod matches-line-p ((matcher single-line-matcher) i)
(= (n matcher) i))
(defmethod map-matching-lines ((matcher single-line-matcher) function min max)
(when (<= min (n matcher) max)
(funcall function (n matcher))))
(defmethod maximum ((matcher single-line-matcher))
(n matcher))
(defclass range-matcher (matcher)
((m :type (or null (integer 0)) :initarg :m :accessor m)
(n :type (or null (integer 0)) :initarg :n :accessor n)))
(defmethod matches-line-p ((matcher range-matcher) i)
(let ((m (m matcher))
(n (n matcher)))
(cond ((null m) (if (null n)
t
(<= i n)))
((null n) (<= m i))
(t (<= m i n)))))
(defmethod map-matching-lines ((matcher range-matcher) function min max)
(loop :with m = (or (m matcher) min)
:with n = (or (n matcher) max)
:for i :from (max m min) :to (min n max)
:do (funcall function i)))
(defmethod maximum ((matcher range-matcher))
(n matcher))
(defclass modulo-matcher (matcher)
((n :type (integer 0) :initarg :n :accessor n)
(modulus :type (integer 1) :initarg :modulus :accessor modulus)))
(defmethod matches-line-p ((matcher modulo-matcher) i)
(= (mod i (modulus matcher))
(n matcher)))
(defmethod map-matching-lines ((matcher modulo-matcher) function min max)
(loop
:with n = (n matcher)
:with modulus = (modulus matcher)
:with start = (if (< n min)
(+ n modulus)
n)
:for i :from start :to max :by modulus
:do (funcall function i)))
(defmethod maximum ((matcher modulo-matcher))
nil)
(defun parse-single-line-designator (designator)
(ppcre:register-groups-bind
((#'parse-integer n))
("^(\\d+)$" designator)
(make-instance 'single-line-matcher :designator designator :n n)))
(defun parse-range-designator (designator)
(ppcre:register-groups-bind
((#'parse-integer m n))
("^(\\d+)?-(\\d+)?$" designator)
(when (and m n)
(assert (<= m n) ()
"Bad line range designator ~S: start (~D) must not be greater than end (~D)."
designator m n))
(make-instance 'range-matcher :designator designator :m m :n n)))
(defun parse-modulo-designator (designator)
(ppcre:register-groups-bind
((#'parse-integer n modulus))
("^(\\d+)?m(\\d+)$" designator)
(when (null n)
(setf n 0))
(assert (< n modulus) ()
"Bad modulo designator ~S: offset (~D) must be less than modulus (~D)."
designator n modulus)
(make-instance 'modulo-matcher :designator designator :n n :modulus modulus)))
(defun parse-line-designator (designator)
(or (parse-single-line-designator designator)
(parse-range-designator designator)
(parse-modulo-designator designator)
(error "Could not parse line designator ~S" designator)))
(defun parse-line-designators (designators)
(mapcar #'parse-line-designator (ppcre:split "," designators)))
(defun dbg (&rest args)
(apply #'format *error-output* args)
(finish-output *error-output*))
(defun compute-limit (matchers)
(let ((maxes (mapcar #'maximum matchers)))
(unless (some #'null maxes)
(reduce #'max maxes))))
(defun collect-lines (matchers)
(loop
:with limit = (compute-limit matchers)
:with min = *index-base*
:with max = nil
:with lines = (make-hash-table)
:for i :from min
:for line = (read-line *standard-input* nil)
:while line
:while (or (null limit) (<= i limit))
:do (progn (setf max i)
(dolist (matcher matchers)
(when (matches-line-p matcher i)
(setf (gethash i lines) line))))
:finally (return (values lines min max))))
(defun output-line (line i)
(when i
(format *standard-output* "~D " i))
(write-line line))
(defun run-slow (matchers)
(let ((include-numbers *include-numbers*))
(multiple-value-bind (lines min max) (collect-lines matchers)
(dolist (matcher matchers)
(map-matching-lines
matcher
(lambda (i)
(output-line (gethash i lines)
(when include-numbers i)))
min max)))))
(defun run-fast (matcher)
(loop
:with limit = (when *limit* (+ *index-base* *limit*))
:with include-numbers = *include-numbers*
:for i :from *index-base*
:for line = (read-line *standard-input* nil)
:while (or (null limit) (< i limit))
:while line
:when (zerop (mod i 100000))
:do (progn #+sbcl (sb-ext:gc))
:when (matches-line-p matcher i)
:do (output-line line (when include-numbers i))))
(defun run (line-designators)
(let ((matchers (parse-line-designators line-designators)))
(if (= 1 (length matchers))
(run-fast (first matchers))
(run-slow matchers)))
(values))
;;;; User Interface -----------------------------------------------------------
(adopt:defparameters (*option-debug* *option-no-debug*)
(adopt:make-boolean-options 'debug
:long "debug"
:short #\d
:help "Enable the Lisp debugger."
:help-no "Disable the Lisp debugger (default)."))
(defparameter *option-help*
(adopt:make-option 'help
:help "Display help and exit."
:long "help"
:short #\h
:reduce (constantly t)))
(defparameter *option-version*
(adopt:make-option 'version
:help "Display version information and exit."
:long "version"
:reduce (constantly t)))
(defparameter *option-one-based*
(adopt:make-option 'one-based
:result-key 'index-base
:help "Start numbering lines at 1 (default)."
:short #\1
:long "one"
:reduce (constantly 1)))
(defparameter *option-zero-based*
(adopt:make-option 'zero-based
:result-key 'index-base
:help "Start numbering lines at 0."
:short #\0
:long "zero"
:reduce (constantly 0)))
(defparameter *option-other-base*
(adopt:make-option 'other-base
:result-key 'index-base
:initial-value 0
:help "Start numbering lines at N."
:parameter "N"
:short #\s
:long "start"
:key #'parse-integer
:reduce #'adopt:last))
(adopt:defparameters (*option-include-numbers* *option-no-include-numbers*)
(adopt:make-boolean-options 'include-numbers
:long "number"
:short #\n
:help "Add line numbers to the output."
:help-no "Do not add line number to the output (default)."))
(defparameter *option-limit*
(adopt:make-option 'limit
:help "Read at most N lines."
:parameter "N"
:short #\l
:long "limit"
:key #'parse-integer
:reduce #'adopt:last))
(defparameter *option-no-limit*
(adopt:make-option 'no-limit
:result-key 'limit
:help "Read all lines (default)."
:short #\L
:long "no-limit"
:reduce (constantly nil)))
(defparameter *option-input-encoding*
(adopt:make-option 'input-encoding
:help "Treat input as being encoded with ENC (default utf-8)."
:parameter "ENC"
:short #\i
:long "input-encoding"
:initial-value "utf-8"
:reduce #'adopt:last))
(defparameter *option-output-encoding*
(adopt:make-option 'output-encoding
:help "Output text encoded with ENC (default utf-8)."
:parameter "ENC"
:short #\o
:long "output-encoding"
:initial-value "utf-8"
:reduce #'adopt:last))
(defparameter *option-input-replacement*
(adopt:make-option 'input-replacement
:help "If an input character is not valid in the selected encoding, replace it with REP."
:parameter "REP"
:short #\r
:long "input-replacement"
:reduce #'adopt:last))
(defparameter *option-no-input-replacement*
(adopt:make-option 'no-input-replacement
:result-key 'input-replacement
:help "If an input character is not valid in the selected encoding, return an error (default)."
:short #\R
:long "no-input-replacement"
:reduce (constantly nil)))
(defparameter *option-output-replacement*
(adopt:make-option 'output-replacement
:help "If an output character would not be not valid in the selected encoding, replace it with REP."
:parameter "REP"
:short #\q
:long "output-replacement"
:reduce #'adopt:last))
(defparameter *option-no-output-replacement*
(adopt:make-option 'no-output-replacement
:result-key 'output-replacement
:help "If an output character would not be valid in the selected encoding, return an error (default)."
:short #\Q
:long "no-output-replacement"
:reduce (constantly nil)))
(adopt:define-string *help-text*
"lines takes a string denoting which lines to print, and prints those lines ~
of standard input.")
(defparameter *examples*
'(("Print line 1234:"
. "cat log.txt | lines 1234")
("Print line 2, then line 1:"
. "cat log.txt | lines 2,1")
("Print lines 6, 12, 100-200, and 999+ (1-indexed):"
. "cat log.txt | lines -1 6,12,100-200,999-")
("Print lines 0, 2, 4, 6, …:"
. "cat log.txt | lines m2")
("Print the third line of every set of four lines:"
. "cat foo.fastq | lines 3m4")))
(defparameter *ui*
(adopt:make-interface
:name "lines"
:usage "[OPTIONS] LINE-DESIGNATORS"
:summary "print designated lines of standard input"
:help *help-text*
:examples *examples*
:contents (list
*option-limit*
*option-no-limit*
*option-help*
*option-version*
*option-debug*
*option-no-debug*
(adopt:make-group 'character-encodings
:title "Character Encodings"
:options (list
*option-input-encoding*
*option-input-replacement*
*option-no-input-replacement*
*option-output-encoding*
*option-output-replacement*
*option-no-output-replacement*))
(adopt:make-group 'line-numbering
:title "Line Numbering"
:options (list
*option-one-based*
*option-zero-based*
*option-other-base*
*option-include-numbers*
*option-no-include-numbers*)))))
(defmacro without-debugger (&body body)
`(multiple-value-prog1
(progn
#+sbcl (sb-ext:disable-debugger)
(progn ,@body))
(progn
#+sbcl (sb-ext:enable-debugger))))
(defmacro exit-on-error (&body body)
`(without-debugger
(handler-case (progn ,@body)
#+sbcl (sb-int:broken-pipe () (adopt:exit))
(error (c) (adopt:print-error-and-exit c)))))
(defmacro exit-on-error-unless (expr &body body)
`(if ,expr
(progn ,@body)
(exit-on-error ,@body)))
(defmacro exit-on-ctrl-c (&body body)
`(handler-case
(with-user-abort:with-user-abort (progn ,@body))
(with-user-abort:user-abort () (adopt:exit 130))))
(defun determine-external-format (encoding replacement)
(let ((encoding (intern (string-upcase encoding) :keyword)))
(if (null replacement)
encoding
(list encoding :replacement replacement))))
(defun toplevel ()
(exit-on-ctrl-c
(multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
(exit-on-error-unless (gethash 'debug options)
(cond
((gethash 'help options) (adopt:print-help-and-exit *ui*))
((gethash 'version options) (write-line *version*) (adopt:exit))
((null arguments) (error "LINE-DESIGNATORS are required."))
(t (destructuring-bind (line-designators . more) arguments
(when more
(error "Unrecognized command line arguments: ~S" more))
(let ((*index-base* (gethash 'index-base options))
(*include-numbers* (gethash 'include-numbers options))
(*limit* (gethash 'limit options))
(input-format (determine-external-format
(gethash 'input-encoding options)
(gethash 'input-replacement options)))
(output-format (determine-external-format
(gethash 'output-encoding options)
(gethash 'output-replacement options))))
(with-open-file (*standard-input* "/dev/stdin"
:external-format input-format)
(with-open-file (*standard-output* "/dev/stdout"
:external-format output-format
:direction :output
:if-exists :append)
(run line-designators)))))))))))
#; Scratch --------------------------------------------------------------------
(let ((*standard-input* (make-string-input-stream
"0 a foo
1 b bar
2 c baz
3 d hello
4 a world
5 b yes
6 c this
7 d is
8 a cat
9 b meow")))
(run "4m4"))