lisp/parsre.lisp @ a65fd2691c94 default tip
More
| author | Steve Losh <steve@stevelosh.com> |
|---|---|
| date | Mon, 03 Nov 2025 14:55:17 -0500 |
| parents | 958358268b7e |
| children | (none) |
(eval-when (:compile-toplevel :load-toplevel :execute) (ql:quickload '(:adopt :cl-ppcre :iterate :with-user-abort :jarl :conserve :losh) :silent t)) (defpackage :parsre (:use :cl :iterate) (:export :toplevel :*ui*)) (in-package :parsre) ;;;; Configuration ------------------------------------------------------------ (defparameter *output-format* :text) (defparameter *case-sensitive* t) ;;;; Errors ------------------------------------------------------------------- (define-condition user-error (error) ()) (define-condition missing-regex (user-error) () (:report "A regular expression is required.")) (define-condition missing-registers (user-error) () (:report "Invalid regex, at least one named register is required.")) (define-condition anonymous-register (user-error) () (:report "Invalid regex, anonymous registers are not supported. Use non-capturing groups (?:…) if you need grouping but not capturing.")) (define-condition malformed-regex (user-error) ((underlying-error :initarg :underlying-error)) (:report (lambda (c s) (format s "Invalid regex: ~A" (slot-value c 'underlying-error))))) ;;;; Functionality ------------------------------------------------------------ (defgeneric header (format names)) (defmethod header (format names) nil) (defmethod header ((format (eql :csv)) names) (conserve:write-row names)) (defmethod output (format names values)) (defmethod output ((format (eql :text)) names values) (iterate (for name :in names) (for value :in values) (format t "~A: ~A~%" name value)) (terpri)) (defmethod output ((format (eql :json)) names values) (jarl:print (alexandria:alist-hash-table (mapcar #'cons names values) :test #'equal)) (terpri)) (defmethod output ((format (eql :csv)) names values) (declare (ignore names)) (conserve:write-row values)) (defun register-values (string register-starts register-ends) (iterate (for start :in-vector register-starts) (for end :in-vector register-ends) (collect (subseq string start end)))) (defun run%% (scanner register-names string) (iterate (with start = 0) (for (values ms me rs re) = (ppcre:scan scanner string :start start)) (while ms) (output *output-format* register-names (register-values string rs re)) (setf start (max (1+ start) me)))) (defun run% (scanner register-names stream) (iterate (for line :in-stream stream :using #'read-line) (run%% scanner register-names line) (force-output))) (defun make-scanner (pattern) (handler-case (ppcre:create-scanner pattern :case-insensitive-mode (not *case-sensitive*) :single-line-mode t) (ppcre:ppcre-syntax-error (c) (error 'malformed-regex :underlying-error c)))) (defun run (pattern paths) (let ((ppcre:*allow-named-registers* t) (ppcre:*use-bmh-matchers* t) (paths (or paths '("-")))) (multiple-value-bind (scanner register-names) (make-scanner pattern) (cond ((null register-names) (error 'missing-registers)) ((member nil register-names) (error 'anonymous-register))) (header *output-format* register-names) (dolist (path paths) (if (string= "-" path) (run% scanner register-names *standard-input*) (with-open-file (stream path :direction :input) (run% scanner register-names stream))))))) ;;;; User Interface ----------------------------------------------------------- (defparameter *examples* '(("Parse some web service logs:" . "parsre '^(?<timestamp>\\\\S+) (?<level>INFO|WARN|ERROR|DEBUG) (?<message>.*)$'") ("Parse wordcount report into JSON:" . "wc **.lisp | parsre --json '^ *(?<lines>\\\\d+) +(?<words>\\\\d+) +(?<bytes>\\\\d+) +(?<path>.+)$'"))) (defparameter *option/help* (adopt:make-option 'help :help "Display help and exit." :long "help" :short #\h :reduce (constantly t))) (defparameter *option/version* (adopt:make-option 'version :help "Display version information and exit." :long "version" :reduce (constantly t))) (adopt:defparameters (*option/debug* *option/no-debug*) (adopt:make-boolean-options 'debug :long "debug" :short #\d :help "Enable the Lisp debugger." :help-no "Disable the Lisp debugger (default).")) (adopt:defparameters (*option/profile* *option/no-profile*) (adopt:make-boolean-options 'profile :long "profile" :help "Profile the run and write results to lisp.prof." :help-no "Do not profile (default).")) (adopt:defparameters (*option/matching/ignore-case* *option/matching/no-ignore-case*) (adopt:make-boolean-options 'ignore-case :long "ignore-case" :short #\i :help "Ignore case (i.e. case-insensitive)." :help-no "Do not ignore case (i.e. case-sensitive) (default)." :initial-value nil)) (defparameter *option/output-format/text* (adopt:make-option 'output-format/text :result-key 'output-format :help "Output results as text (default)." :long "text" :short #\t :initial-value :text :reduce (constantly :text))) (defparameter *option/output-format/json* (adopt:make-option 'output-format/json :result-key 'output-format :help "Output results as JSON." :long "json" :short #\j :reduce (constantly :json))) (defparameter *option/output-format/csv* (adopt:make-option 'output-format/csv :result-key 'output-format :help "Output results as CSV." :long "csv" :short #\c :reduce (constantly :csv))) (adopt:define-string *help-text* "Parsre takes a Perl-compatible regular expression, matches it against input, ~ and outputs the expression's registers in a variety of formats.~@ ~@ The regular expression will be matched against the input line-by-line. ~ Multiple matches per line are supported.~@ ~@ Registers can be named or anonymous. Anonymous registers will be named ~ arbitrarily (the scheme may change in the future).") (defparameter *ui* (adopt:make-interface :name "parsre" :usage "[OPTIONS] REGEX [FILE...]" :summary "almost a parser" :help *help-text* :examples *examples* :contents (list *option/help* *option/version* (adopt:make-group 'matching-options :title "Matching Options" :options (list *option/matching/ignore-case* *option/matching/no-ignore-case*)) (adopt:make-group 'output-options :title "Output Options" :options (list *option/output-format/text* *option/output-format/json* *option/output-format/csv*)) (adopt:make-group 'debugging-options :title "Debugging Options" :options (list *option/debug* *option/no-debug* *option/profile* *option/no-profile*))))) (defun configure (options) (setf *output-format* (gethash 'output-format options) *case-sensitive* (not (gethash 'ignore-case options))) (values)) (defun toplevel () (sb-ext:disable-debugger) (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*) (when (gethash 'debug options) (sb-ext:enable-debugger)) (handler-case (with-user-abort:with-user-abort (cond ((gethash 'help options) (adopt:print-help-and-exit *ui*)) ((null arguments) (error 'missing-regex)) (t (destructuring-bind (pattern . files) arguments (configure options) (if (gethash 'profile options) (losh:profile (run pattern files)) (run pattern files)))))) (with-user-abort:user-abort () (adopt:exit 130)) (user-error (e) (adopt:print-error-and-exit e)))))