lisp/lines.lisp @ 619b0d73e6ae

More
author Steve Losh <steve@stevelosh.com>
date Tue, 29 Aug 2023 15:05:19 -0400
parents 2f06facce49e
children (none)
(eval-when (:compile-toplevel :load-toplevel :execute)
  (ql:quickload '(:adopt :cl-ppcre :with-user-abort) :silent t))

(defpackage :lines
  (:use :cl)
  (:export :toplevel :*ui*))

(in-package :lines)

;; TODO: Optimize by eliminating collection when all matchers are in order.

;;;; Configuration ------------------------------------------------------------
(defparameter *version* "1.0.0")
(defparameter *index-base* 0)
(defparameter *include-numbers* nil)
(defparameter *limit* nil)


;;;; Functionality ------------------------------------------------------------
(defclass matcher ()
  ((designator :type string :initarg :designator :accessor designator)))

(defmethod print-object ((o matcher) stream)
  (print-unreadable-object (o stream :type t)
    (format stream "~S" (designator o))))

(defgeneric matches-line-p (matcher line-number))
(defgeneric map-matching-lines (matcher function min max))
(defgeneric maximum (matcher))


(defclass single-line-matcher (matcher)
  ((n :type (integer 0) :initarg :n :accessor n)))

(defmethod matches-line-p ((matcher single-line-matcher) i)
  (= (n matcher) i))

(defmethod map-matching-lines ((matcher single-line-matcher) function min max)
  (when (<= min (n matcher) max)
    (funcall function (n matcher))))

(defmethod maximum ((matcher single-line-matcher))
  (n matcher))


(defclass range-matcher (matcher)
  ((m :type (or null (integer 0)) :initarg :m :accessor m)
   (n :type (or null (integer 0)) :initarg :n :accessor n)))

(defmethod matches-line-p ((matcher range-matcher) i)
  (let ((m (m matcher))
        (n (n matcher)))
    (cond ((null m) (if (null n)
                      t
                      (<= i n)))
          ((null n) (<= m i))
          (t (<= m i n)))))

(defmethod map-matching-lines ((matcher range-matcher) function min max)
  (loop :with m = (or (m matcher) min)
        :with n = (or (n matcher) max)
        :for i :from (max m min) :to (min n max)
        :do (funcall function i)))

(defmethod maximum ((matcher range-matcher))
  (n matcher))


(defclass modulo-matcher (matcher)
  ((n :type (integer 0) :initarg :n :accessor n)
   (modulus :type (integer 1) :initarg :modulus :accessor modulus)))

(defmethod matches-line-p ((matcher modulo-matcher) i)
  (= (mod i (modulus matcher))
     (n matcher)))

(defmethod map-matching-lines ((matcher modulo-matcher) function min max)
  (loop
    :with n = (n matcher)
    :with modulus = (modulus matcher)
    :with start = (if (< n min)
                    (+ n modulus)
                    n)
    :for i :from start :to max :by modulus
    :do (funcall function i)))

(defmethod maximum ((matcher modulo-matcher))
  nil)


(defun parse-single-line-designator (designator)
  (ppcre:register-groups-bind
      ((#'parse-integer n))
      ("^(\\d+)$" designator)
    (make-instance 'single-line-matcher :designator designator :n n)))

(defun parse-range-designator (designator)
  (ppcre:register-groups-bind
      ((#'parse-integer m n))
      ("^(\\d+)?-(\\d+)?$" designator)
    (when (and m n)
      (assert (<= m n) ()
        "Bad line range designator ~S: start (~D) must not be greater than end (~D)."
        designator m n))
    (make-instance 'range-matcher :designator designator :m m :n n)))

(defun parse-modulo-designator (designator)
  (ppcre:register-groups-bind
      ((#'parse-integer n modulus))
      ("^(\\d+)?m(\\d+)$" designator)
    (when (null n)
      (setf n 0))
    (assert (< n modulus) ()
      "Bad modulo designator ~S: offset (~D) must be less than modulus (~D)."
      designator n modulus)
    (make-instance 'modulo-matcher :designator designator :n n :modulus modulus)))

(defun parse-line-designator (designator)
  (or (parse-single-line-designator designator)
      (parse-range-designator designator)
      (parse-modulo-designator designator)
      (error "Could not parse line designator ~S" designator)))

(defun parse-line-designators (designators)
  (mapcar #'parse-line-designator (ppcre:split "," designators)))


(defun dbg (&rest args)
  (apply #'format *error-output* args)
  (finish-output *error-output*))

(defun compute-limit (matchers)
  (let ((maxes (mapcar #'maximum matchers)))
    (unless (some #'null maxes)
      (reduce #'max maxes))))

(defun collect-lines (matchers)
  (loop
    :with limit = (compute-limit matchers)
    :with min = *index-base*
    :with max = nil
    :with lines = (make-hash-table)
    :for i :from min
    :for line = (read-line *standard-input* nil)
    :while line
    :while (or (null limit) (<= i limit))
    :do (progn (setf max i)
               (dolist (matcher matchers)
                 (when (matches-line-p matcher i)
                   (setf (gethash i lines) line))))
    :finally (return (values lines min max))))


(defun output-line (line i)
  (when i
    (format *standard-output* "~D " i))
  (write-line line))

(defun run-slow (matchers)
  (let ((include-numbers *include-numbers*))
    (multiple-value-bind (lines min max) (collect-lines matchers)
      (dolist (matcher matchers)
        (map-matching-lines
          matcher
          (lambda (i)
            (output-line (gethash i lines)
                         (when include-numbers i)))
          min max)))))

(defun run-fast (matcher)
  (loop
    :with limit = (when *limit* (+ *index-base* *limit*))
    :with include-numbers = *include-numbers*
    :for i :from *index-base*
    :for line = (read-line *standard-input* nil)
    :while (or (null limit) (< i limit))
    :while line
    :when (zerop (mod i 100000))
    :do (progn #+sbcl (sb-ext:gc))
    :when (matches-line-p matcher i)
    :do (output-line line (when include-numbers i))))

(defun run (line-designators)
  (let ((matchers (parse-line-designators line-designators)))
    (if (= 1 (length matchers))
      (run-fast (first matchers))
      (run-slow matchers)))
  (values))


;;;; User Interface -----------------------------------------------------------
(adopt:defparameters (*option-debug* *option-no-debug*)
  (adopt:make-boolean-options 'debug
    :long "debug"
    :short #\d
    :help "Enable the Lisp debugger."
    :help-no "Disable the Lisp debugger (default)."))

(defparameter *option-help*
  (adopt:make-option 'help
    :help "Display help and exit."
    :long "help"
    :short #\h
    :reduce (constantly t)))

(defparameter *option-version*
  (adopt:make-option 'version
    :help "Display version information and exit."
    :long "version"
    :reduce (constantly t)))


(defparameter *option-one-based*
  (adopt:make-option 'one-based
    :result-key 'index-base
    :help "Start numbering lines at 1."
    :short #\1
    :long "one"
    :reduce (constantly 1)))

(defparameter *option-zero-based*
  (adopt:make-option 'zero-based
    :result-key 'index-base
    :help "Start numbering lines at 0 (default)."
    :short #\0
    :long "zero"
    :reduce (constantly 0)))

(defparameter *option-other-base*
  (adopt:make-option 'other-base
    :result-key 'index-base
    :initial-value 0
    :help "Start numbering lines at N."
    :parameter "N"
    :short #\s
    :long "start"
    :key #'parse-integer
    :reduce #'adopt:last))

(adopt:defparameters (*option-include-numbers* *option-no-include-numbers*)
  (adopt:make-boolean-options 'include-numbers
    :long "number"
    :short #\n
    :help "Add line numbers to the output."
    :help-no "Do not add line number to the output (default)."))


(defparameter *option-limit*
  (adopt:make-option 'limit
    :help "Read at most N lines."
    :parameter "N"
    :short #\l
    :long "limit"
    :key #'parse-integer
    :reduce #'adopt:last))

(defparameter *option-no-limit*
  (adopt:make-option 'no-limit
    :result-key 'limit
    :help "Read all lines (default)."
    :short #\L
    :long "no-limit"
    :reduce (constantly nil)))


(defparameter *option-input-encoding*
  (adopt:make-option 'input-encoding
    :help "Treat input as being encoded with ENC (default utf-8)."
    :parameter "ENC"
    :short #\i
    :long "input-encoding"
    :initial-value "utf-8"
    :reduce #'adopt:last))

(defparameter *option-output-encoding*
  (adopt:make-option 'output-encoding
    :help "Output text encoded with ENC (default utf-8)."
    :parameter "ENC"
    :short #\o
    :long "output-encoding"
    :initial-value "utf-8"
    :reduce #'adopt:last))

(defparameter *option-input-replacement*
  (adopt:make-option 'input-replacement
    :help "If an input character is not valid in the selected encoding, replace it with REP."
    :parameter "REP"
    :short #\r
    :long "input-replacement"
    :reduce #'adopt:last))

(defparameter *option-no-input-replacement*
  (adopt:make-option 'no-input-replacement
    :result-key 'input-replacement
    :help "If an input character is not valid in the selected encoding, return an error (default)."
    :short #\R
    :long "no-input-replacement"
    :reduce (constantly nil)))

(defparameter *option-output-replacement*
  (adopt:make-option 'output-replacement
    :help "If an output character would not be not valid in the selected encoding, replace it with REP."
    :parameter "REP"
    :short #\q
    :long "output-replacement"
    :reduce #'adopt:last))

(defparameter *option-no-output-replacement*
  (adopt:make-option 'no-output-replacement
    :result-key 'output-replacement
    :help "If an output character would not be valid in the selected encoding, return an error (default)."
    :short #\Q
    :long "no-output-replacement"
    :reduce (constantly nil)))


(adopt:define-string *help-text*
  "lines takes a string denoting which lines to print, and prints those lines ~
   of standard input.")

(defparameter *examples*
  '(("Print line 1234:"
     . "cat log.txt | lines 1234")
    ("Print line 2, then line 1:"
     . "cat log.txt | lines 2,1")
    ("Print lines 6, 12, 100-200, and 999+ (1-indexed):"
     . "cat log.txt | lines -1 6,12,100-200,999-")
    ("Print lines 0, 2, 4, 6, …:"
     . "cat log.txt | lines m2")
    ("Print the third line of every set of four lines:"
     . "cat foo.fastq | lines 3m4")))


(defparameter *ui*
  (adopt:make-interface
    :name "lines"
    :usage "[OPTIONS] LINE-DESIGNATORS"
    :summary "print designated lines of standard input"
    :help *help-text*
    :examples *examples*
    :contents (list
                *option-limit*
                *option-no-limit*
                *option-help*
                *option-version*
                *option-debug*
                *option-no-debug*
                (adopt:make-group 'character-encodings
                                  :title "Character Encodings"
                                  :options (list
                                             *option-input-encoding*
                                             *option-input-replacement*
                                             *option-no-input-replacement*
                                             *option-output-encoding*
                                             *option-output-replacement*
                                             *option-no-output-replacement*))
                (adopt:make-group 'line-numbering
                                  :title "Line Numbering"
                                  :options (list
                                             *option-one-based*
                                             *option-zero-based*
                                             *option-other-base*
                                             *option-include-numbers*
                                             *option-no-include-numbers*)))))


(defmacro without-debugger (&body body)
  `(multiple-value-prog1
     (progn
       #+sbcl (sb-ext:disable-debugger)
       (progn ,@body))
     (progn
       #+sbcl (sb-ext:enable-debugger))))

(defmacro exit-on-error (&body body)
  `(without-debugger
     (handler-case (progn ,@body)
       #+sbcl (sb-int:broken-pipe () (adopt:exit))
       (error (c) (adopt:print-error-and-exit c)))))

(defmacro exit-on-error-unless (expr &body body)
  `(if ,expr
     (progn ,@body)
     (exit-on-error ,@body)))

(defmacro exit-on-ctrl-c (&body body)
  `(handler-case
       (with-user-abort:with-user-abort (progn ,@body))
     (with-user-abort:user-abort () (adopt:exit 130))))


(defun determine-external-format (encoding replacement)
  (let ((encoding (intern (string-upcase encoding) :keyword)))
    (if (null replacement)
      encoding
      (list encoding :replacement replacement))))

(defun toplevel ()
  (exit-on-ctrl-c
    (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
      (exit-on-error-unless (gethash 'debug options)
        (cond
          ((gethash 'help options) (adopt:print-help-and-exit *ui*))
          ((gethash 'version options) (write-line *version*) (adopt:exit))
          ((null arguments) (error "LINE-DESIGNATORS are required."))
          (t (destructuring-bind (line-designators . more) arguments
               (when more
                 (error "Unrecognized command line arguments: ~S" more))
               (let ((*index-base* (gethash 'index-base options))
                     (*include-numbers* (gethash 'include-numbers options))
                     (*limit* (gethash 'limit options))
                     (input-format (determine-external-format
                                     (gethash 'input-encoding options)
                                     (gethash 'input-replacement options)))
                     (output-format (determine-external-format
                                      (gethash 'output-encoding options)
                                      (gethash 'output-replacement options))))
                 (with-open-file (*standard-input* "/dev/stdin"
                                                   :external-format input-format)
                   (with-open-file (*standard-output* "/dev/stdout"
                                                      :external-format output-format
                                                      :direction :output
                                                      :if-exists :append)
                     (run line-designators)))))))))))


#; Scratch --------------------------------------------------------------------

(let ((*standard-input* (make-string-input-stream
"0 a foo
1 b bar
2 c baz
3 d hello
4 a world
5 b yes
6 c this
7 d is
8 a cat
9 b meow")))
  (run "4m4"))