db7650d6c3a4

Clean up and improve batchcolor
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Wed, 23 Dec 2020 09:20:46 -0500
parents 3024550ba049
children 65a592d70211 6a9e64a79dd2
branches/tags (none)
files lisp/batchcolor.lisp

Changes

--- a/lisp/batchcolor.lisp	Mon Dec 14 14:32:21 2020 -0500
+++ b/lisp/batchcolor.lisp	Wed Dec 23 09:20:46 2020 -0500
@@ -19,24 +19,41 @@
      (* b 1)
      16))
 
-(defparameter *colors* (let ((result (make-array 256 :fill-pointer 0)))
-                         (dotimes (r 6)
-                           (dotimes (g 6)
-                             (dotimes (b 6)
-                               (unless (< (+ r g b) 3)
-                                 ;; Don't use dark, hard-to-read colors.
-                                 (vector-push-extend (rgb-code r g b) result)))))
-                         result))
+(defparameter *colors*
+  (let ((result (make-array 256 :fill-pointer 0)))
+    (dotimes (r 6)
+      (dotimes (g 6)
+        (dotimes (b 6)
+          (unless (< (+ r g b) 3)
+            ;; Don't use dark, hard-to-read colors.
+            (vector-push-extend (rgb-code r g b) result)))))
+    result))
+
+(defparameter *explicits* (make-hash-table :test #'equal))
+
+
+;;;; Errors -------------------------------------------------------------------
+(define-condition user-error (error) ())
+
+(define-condition missing-regex (user-error) ()
+  (:report "A regular expression is required."))
+
+(define-condition malformed-regex (user-error)
+  ((underlying-error :initarg :underlying-error))
+  (:report (lambda (c s)
+             (format s "Invalid regex: ~A" (slot-value c 'underlying-error)))))
+
+(define-condition overlapping-groups (user-error) ()
+  (:report "Invalid regex: seems to contain overlapping capturing groups."))
+
+(define-condition malformed-explicit (user-error)
+  ((spec :initarg :spec))
+  (:report (lambda (c s)
+             (format s "Invalid explicit spec ~S, must be of the form \"R,G,B:string\" with colors being 0-5."
+                     (slot-value c 'spec)))))
 
 
 ;;;; Functionality ------------------------------------------------------------
-(define-condition bad-regex (error) ()
-  (:report "Invalid regex."))
-
-(define-condition bad-regex-group-count (bad-regex) ()
-  (:report "Invalid regex: must contain exactly 1 register group, e.g. 'x (fo+) y'."))
-
-
 (defun djb2 (string)
   ;; http://www.cse.yorku.ca/~oz/hash.html
   (reduce (lambda (hash c)
@@ -46,8 +63,9 @@
           :key #'char-code))
 
 (defun find-color (string)
-  (aref *colors* (mod (+ (djb2 string) *start*)
-                      (length *colors*))))
+  (gethash string *explicits*
+           (aref *colors* (mod (+ (djb2 string) *start*)
+                               (length *colors*)))))
 
 (defun ansi-color-start (color)
   (format nil "~C[38;5;~Dm" #\Escape color))
@@ -55,19 +73,26 @@
 (defun ansi-color-end ()
   (format nil "~C[0m" #\Escape))
 
+(defun print-colorized (string)
+  (format *standard-output* "~A~A~A"
+          (ansi-color-start (find-color string))
+          string
+          (ansi-color-end)))
+
 (defun colorize-line (scanner line &aux (start 0))
   (ppcre:do-scans (ms me rs re scanner line)
-    (setf rs (remove nil rs)
-          re (remove nil re))
-    (when (/= 1 (length rs))
-      (error 'bad-regex-group-count))
-    (let* ((word-start (aref rs 0))
-           (word-end (aref re 0))
-           (word (subseq line word-start word-end))
-           (color (find-color word)))
-      (write-string line *standard-output* :start start :end word-start)
-      (format t "~A~A~A" (ansi-color-start color) word (ansi-color-end))
-      (setf start word-end)))
+    ;; If we don't have any register groups, colorize the entire match.
+    ;; Otherwise, colorize each matched capturing group.
+    (let* ((regs? (plusp (length rs)))
+           (starts (if regs? (remove nil rs) (list ms)))
+           (ends   (if regs? (remove nil re) (list me))))
+      (map nil (lambda (word-start word-end)
+                 (unless (<= start word-start)
+                   (error 'overlapping-groups))
+                 (write-string line *standard-output* :start start :end word-start)
+                 (print-colorized (subseq line word-start word-end))
+                 (setf start word-end))
+           starts ends)))
   (write-line line *standard-output* :start start)
   (values))
 
@@ -76,19 +101,13 @@
 (defun run% (scanner stream)
   (loop :for line = (read-line stream nil)
         :while line
-        :do (tagbody retry
-              (restart-case (colorize-line scanner line)
-                (supply-new-regex (v)
-                    :test (lambda (c) (typep c 'bad-regex))
-                    :report "Supply a new regular expression as a string."
-                    :interactive (lambda () (list (read-line *query-io*)))
-                  (setf scanner (ppcre:create-scanner v))
-                  (go retry))))))
+        :do (colorize-line scanner line)))
 
 (defun run (pattern paths)
-  (if (null paths)
-    (setf paths '("-")))
-  (let ((scanner (ppcre:create-scanner pattern)))
+  (let ((scanner (handler-case (ppcre:create-scanner pattern)
+                   (ppcre:ppcre-syntax-error (c)
+                     (error 'malformed-regex :underlying-error c))))
+        (paths (or paths '("-"))))
     (dolist (path paths)
       (if (string= "-" path)
         (run% scanner *standard-input*)
@@ -97,6 +116,13 @@
 
 
 ;;;; User Interface -----------------------------------------------------------
+(defun parse-explicit (spec)
+  (ppcre:register-groups-bind
+      ((#'parse-integer r g b) string)
+      ("([0-5]),([0-5]),([0-5]):(.+)" spec)
+    (return-from parse-explicit (cons string (rgb-code r g b))))
+  (error 'malformed-explicit :spec spec))
+
 (adopt:defparameters (*option-randomize* *option-no-randomize*)
   (adopt:make-boolean-options 'randomize
     :help "Randomize the choice of color each run."
@@ -111,6 +137,20 @@
     :help "Enable the Lisp debugger."
     :help-no "Disable the Lisp debugger (the default)."))
 
+(defparameter *option-explicit*
+  (adopt:make-option 'explicit
+    :parameter "R,G,B:STRING"
+    :help "Highlight STRING in an explicit color.  May be given multiple times."
+    :manual (format nil "~
+      Highlight STRING in an explicit color instead of randomly choosing one.  ~
+      R, G, and B must be 0-5.  STRING is treated as literal string, not a regex.  ~
+      Note that this doesn't automatically add STRING to the overall regex, you ~
+      must do that yourself!  This is a known bug that may be fixed in the future.")
+    :long "explicit"
+    :short #\e
+    :key #'parse-explicit
+    :reduce #'adopt:collect))
+
 (defparameter *option-help*
   (adopt:make-option 'help
     :help "Display help and exit."
@@ -127,14 +167,28 @@
 
 (adopt:define-string *help-text*
   "batchcolor takes a regular expression and matches it against standard ~
-   input one line at a time.  Each unique match is highlighted in a random ~
-   color.")
+   input one line at a time.  Each unique match is highlighted in its own color.~@
+   ~@
+   If the regular expression contains any capturing groups, only those parts of ~
+   the matches will be highlighted.  Otherwise the entire match will be ~
+   highlighted.  Overlapping capturing groups are not supported.")
+
+(adopt:define-string *extra-manual-text*
+  "Overlapping capturing groups are not supported because it's not clear what ~
+  the result should be.  For example: what should ((f)oo|(b)oo) highlight when ~
+  matched against 'foo'?  Should it highlight 'foo' in one color?  The 'f' in ~
+  one color and 'oo' in another color?  Should that 'oo' be the same color as ~
+  the 'oo' in 'boo' even though the overall match was different?  There are too ~
+  many possible behaviors and no clear winner, so batchcolor disallows ~
+  overlapping capturing groups entirely.")
 
 (defparameter *examples*
   '(("Colorize IRC nicknames in a chat log:"
      . "cat channel.log | batchcolor '<(\\\\w+)>'")
     ("Colorize UUIDs in a request log:"
-     . "tail -f /var/log/foo | batchcolor '([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})'")))
+     . "tail -f /var/log/foo | batchcolor '[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'")
+    ("Colorize some keywords explicitly and IPv4 addresses randomly (note that the keywords have to be in the main regex too, not just in the -e options):"
+     . "batchcolor 'WARN|INFO|ERR|(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}' -e '5,0,0:ERR' -e '5,4,0:WARN' -e '2,2,5:INFO'")))
 
 
 (defparameter *ui*
@@ -143,51 +197,38 @@
     :usage "[OPTIONS] REGEX [FILE...]"
     :summary "colorize regex matches in batches"
     :help *help-text*
+    :manual (concatenate 'string *help-text* (format nil "~2%") *extra-manual-text*)
     :examples *examples*
     :contents (list *option-randomize*
                     *option-no-randomize*
+                    *option-explicit*
                     *option-debug*
                     *option-no-debug*
                     *option-help*
                     *option-version*)))
 
 
-(defmacro without-debugger (&body body)
-  `(multiple-value-prog1
-     (progn
-       #+sbcl (sb-ext:disable-debugger)
-       (progn ,@body))
-     (progn
-       #+sbcl (sb-ext:enable-debugger))))
-
-(defmacro exit-on-error (&body body)
-  `(without-debugger
-     (handler-case (progn ,@body)
-       (error (c) (adopt:print-error-and-exit c)))))
-
-(defmacro exit-on-error-unless (expr &body body)
-  (let ((thunk (gensym "THUNK")))
-    `(flet ((,thunk () ,@body))
-       (if ,expr
-         (,thunk)
-         (exit-on-error (,thunk))))))
-
 (defmacro exit-on-ctrl-c (&body body)
   `(handler-case (with-user-abort:with-user-abort (progn ,@body))
      (with-user-abort:user-abort () (adopt:exit 130))))
 
-
 (defun toplevel ()
+  (sb-ext:disable-debugger)
   (exit-on-ctrl-c
     (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
-      (exit-on-error-unless (gethash 'debug options)
-        (cond
-          ((gethash 'help options) (adopt:print-help-and-exit *ui*))
-          ((gethash 'version options) (write-line *version*) (adopt:exit))
-          ((null arguments) (error "A regular expression is required."))
-          (t (destructuring-bind (pattern . files) arguments
-               (let ((*start* (if (gethash 'randomize options)
-                                (random 256 (make-random-state t))
-                                0)))
-                 (run pattern files)))))))))
+      (when (gethash 'debug options)
+        (sb-ext:enable-debugger))
+      (handler-case
+          (cond
+            ((gethash 'help options) (adopt:print-help-and-exit *ui*))
+            ((gethash 'version options) (write-line *version*) (adopt:exit))
+            ((null arguments) (error 'missing-regex))
+            (t (destructuring-bind (pattern . files) arguments
+                 (loop :for (string . rgb) :in (gethash 'explicit options)
+                       :do (setf (gethash string *explicits*) rgb))
+                 (let ((*start* (if (gethash 'randomize options)
+                                  (random 256 (make-random-state t))
+                                  0)))
+                   (run pattern files)))))
+        (user-error (e) (adopt:print-error-and-exit e))))))