More
author |
Steve Losh <steve@stevelosh.com> |
date |
Sat, 21 Oct 2023 15:01:32 -0400 |
parents |
958358268b7e |
children |
(none) |
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload '(:adopt :cl-ppcre :iterate :with-user-abort
:jarl :conserve :losh)
:silent t))
(defpackage :parsre
(:use :cl :iterate)
(:export :toplevel :*ui*))
(in-package :parsre)
;;;; Configuration ------------------------------------------------------------
(defparameter *output-format* :text)
(defparameter *case-sensitive* t)
;;;; Errors -------------------------------------------------------------------
(define-condition user-error (error) ())
(define-condition missing-regex (user-error) ()
(:report "A regular expression is required."))
(define-condition missing-registers (user-error) ()
(:report "Invalid regex, at least one named register is required."))
(define-condition anonymous-register (user-error) ()
(:report "Invalid regex, anonymous registers are not supported. Use non-capturing groups (?:…) if you need grouping but not capturing."))
(define-condition malformed-regex (user-error)
((underlying-error :initarg :underlying-error))
(:report (lambda (c s)
(format s "Invalid regex: ~A" (slot-value c 'underlying-error)))))
;;;; Functionality ------------------------------------------------------------
(defgeneric header (format names))
(defmethod header (format names) nil)
(defmethod header ((format (eql :csv)) names) (conserve:write-row names))
(defmethod output (format names values))
(defmethod output ((format (eql :text)) names values)
(iterate (for name :in names)
(for value :in values)
(format t "~A: ~A~%" name value))
(terpri))
(defmethod output ((format (eql :json)) names values)
(jarl:print (alexandria:alist-hash-table (mapcar #'cons names values) :test #'equal))
(terpri))
(defmethod output ((format (eql :csv)) names values)
(declare (ignore names))
(conserve:write-row values))
(defun register-values (string register-starts register-ends)
(iterate
(for start :in-vector register-starts)
(for end :in-vector register-ends)
(collect (subseq string start end))))
(defun run%% (scanner register-names string)
(iterate (with start = 0)
(for (values ms me rs re) = (ppcre:scan scanner string :start start))
(while ms)
(output *output-format* register-names (register-values string rs re))
(setf start (max (1+ start) me))))
(defun run% (scanner register-names stream)
(iterate (for line :in-stream stream :using #'read-line)
(run%% scanner register-names line)
(force-output)))
(defun make-scanner (pattern)
(handler-case
(ppcre:create-scanner pattern
:case-insensitive-mode (not *case-sensitive*)
:single-line-mode t)
(ppcre:ppcre-syntax-error (c) (error 'malformed-regex :underlying-error c))))
(defun run (pattern paths)
(let ((ppcre:*allow-named-registers* t)
(ppcre:*use-bmh-matchers* t)
(paths (or paths '("-"))))
(multiple-value-bind (scanner register-names) (make-scanner pattern)
(cond ((null register-names) (error 'missing-registers))
((member nil register-names) (error 'anonymous-register)))
(header *output-format* register-names)
(dolist (path paths)
(if (string= "-" path)
(run% scanner register-names *standard-input*)
(with-open-file (stream path :direction :input)
(run% scanner register-names stream)))))))
;;;; User Interface -----------------------------------------------------------
(defparameter *examples*
'(("Parse some web service logs:" .
"parsre '^(?<timestamp>\\\\S+) (?<level>INFO|WARN|ERROR|DEBUG) (?<message>.*)$'")
("Parse wordcount report into JSON:" .
"wc **.lisp | parsre --json '^ *(?<lines>\\\\d+) +(?<words>\\\\d+) +(?<bytes>\\\\d+) +(?<path>.+)$'")))
(defparameter *option/help*
(adopt:make-option 'help
:help "Display help and exit."
:long "help"
:short #\h
:reduce (constantly t)))
(defparameter *option/version*
(adopt:make-option 'version
:help "Display version information and exit."
:long "version"
:reduce (constantly t)))
(adopt:defparameters (*option/debug* *option/no-debug*)
(adopt:make-boolean-options 'debug
:long "debug"
:short #\d
:help "Enable the Lisp debugger."
:help-no "Disable the Lisp debugger (default)."))
(adopt:defparameters (*option/profile* *option/no-profile*)
(adopt:make-boolean-options 'profile
:long "profile"
:help "Profile the run and write results to lisp.prof."
:help-no "Do not profile (default)."))
(adopt:defparameters (*option/matching/ignore-case* *option/matching/no-ignore-case*)
(adopt:make-boolean-options 'ignore-case
:long "ignore-case"
:short #\i
:help "Ignore case (i.e. case-insensitive)."
:help-no "Do not ignore case (i.e. case-sensitive) (default)."
:initial-value nil))
(defparameter *option/output-format/text*
(adopt:make-option 'output-format/text
:result-key 'output-format
:help "Output results as text (default)."
:long "text"
:short #\t
:initial-value :text
:reduce (constantly :text)))
(defparameter *option/output-format/json*
(adopt:make-option 'output-format/json
:result-key 'output-format
:help "Output results as JSON."
:long "json"
:short #\j
:reduce (constantly :json)))
(defparameter *option/output-format/csv*
(adopt:make-option 'output-format/csv
:result-key 'output-format
:help "Output results as CSV."
:long "csv"
:short #\c
:reduce (constantly :csv)))
(adopt:define-string *help-text*
"Parsre takes a Perl-compatible regular expression, matches it against input, ~
and outputs the expression's registers in a variety of formats.~@
~@
The regular expression will be matched against the input line-by-line. ~
Multiple matches per line are supported.~@
~@
Registers can be named or anonymous. Anonymous registers will be named ~
arbitrarily (the scheme may change in the future).")
(defparameter *ui*
(adopt:make-interface
:name "parsre"
:usage "[OPTIONS] REGEX [FILE...]"
:summary "almost a parser"
:help *help-text*
:examples *examples*
:contents (list *option/help*
*option/version*
(adopt:make-group
'matching-options
:title "Matching Options"
:options (list *option/matching/ignore-case*
*option/matching/no-ignore-case*))
(adopt:make-group
'output-options
:title "Output Options"
:options (list *option/output-format/text*
*option/output-format/json*
*option/output-format/csv*))
(adopt:make-group
'debugging-options
:title "Debugging Options"
:options (list *option/debug*
*option/no-debug*
*option/profile*
*option/no-profile*)))))
(defun configure (options)
(setf *output-format* (gethash 'output-format options)
*case-sensitive* (not (gethash 'ignore-case options)))
(values))
(defun toplevel ()
(sb-ext:disable-debugger)
(multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
(when (gethash 'debug options)
(sb-ext:enable-debugger))
(handler-case
(with-user-abort:with-user-abort
(cond
((gethash 'help options) (adopt:print-help-and-exit *ui*))
((null arguments) (error 'missing-regex))
(t (destructuring-bind (pattern . files) arguments
(configure options)
(if (gethash 'profile options)
(losh:profile (run pattern files))
(run pattern files))))))
(with-user-abort:user-abort () (adopt:exit 130))
(user-error (e) (adopt:print-error-and-exit e)))))