Merge
author |
Steve Losh <steve@stevelosh.com> |
date |
Thu, 13 Jun 2024 12:30:10 -0400 |
parents |
f37e47cda7c4 |
children |
(none) |
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload '(:adopt :alexandria :cl-ppcre :with-user-abort :local-time)
:silent t))
(defpackage :twizzle
(:use :cl)
(:export :toplevel :*ui*))
(in-package :twizzle)
;;;; Configuration ------------------------------------------------------------
(setf local-time:*default-timezone* local-time:+utc-zone+)
(defparameter *time-formats*
;; An alist of (name . (parse-regex local-time-format-spec)), or (name . nil)
;; for more complicated formats.
`((:rfc-3339 . ("(\\d{4})-(\\d{2})-(\\d{2})[ T](\\d{2}):(\\d{2}):(\\d{2})(?:[.]\\d+)?([+-]\\d{2}:\\d{2}|Z)?"
,local-time:+rfc3339-format+))
(:iso-8601 . ("(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(?:,\\d+)?([+-]\\d{2}:\\d{2}|Z)?"
,local-time:+iso-8601-format+))
(:simple . ("(\\d{4})/(\\d{2})/(\\d{2}) (\\d{2}):(\\d{2}):(\\d{2})()"
((:year 4) #\/ (:month 2) #\/ (:day 2) #\space (:hour 2) #\: (:min 2) #\: (:sec 2))))
(:gnuplot . nil)
(:golang . nil)
(:unix-seconds . nil)
(:unix-milliseconds . nil)))
;;;; Utilities ----------------------------------------------------------------
(defmacro match ((register-vars (start end) (regex target)) &body body)
(alexandria:with-gensyms (rs re)
(alexandria:once-only (regex target)
`(multiple-value-bind (,start ,end ,rs ,re) (ppcre:scan ,regex ,target)
(when ,start
(let (,@(loop :for r :from 0
:for var :in register-vars
:collect `(,var (when (aref ,rs ,r)
(subseq ,target (aref ,rs ,r) (aref ,re ,r))))))
,@body))))))
(defun i (s)
(parse-integer s))
(defun keywordize (s)
(alexandria:make-keyword (string-upcase s)))
;;;; Time Formats -------------------------------------------------------------
(defun microseconds->nanoseconds (msec)
(* msec 1000))
(defun milliseconds->nanoseconds (msec)
(* msec 1000 1000))
(defun get-format (format)
(or (alexandria:assoc-value *time-formats* format)
(error "Unknown time format ~S" format)))
(defun get-regex (format)
(first (get-format format)))
(defun get-local-time-format (format)
(second (get-format format)))
(defun parse-timezone (string)
(if (member string '(nil "" "Z" "UTC" "+00:00" "-00:00") :test #'equal)
local-time:+utc-zone+
(or (local-time:find-timezone-by-location-name string)
(error "TODO: handle timezone ~S" string))))
(defgeneric make-parser (format)
(:documentation "Return a parsing function for the given format."))
(defmethod make-parser (format)
(let ((scanner (ppcre:create-scanner (get-regex format))))
(lambda (s)
(match ((year month day hour minute second timezone)
(start end)
(scanner s))
(values
(local-time:encode-timestamp
0 (i second) (i minute) (i hour) (i day) (i month) (i year)
:timezone (parse-timezone timezone))
start end)))))
(defmethod make-parser ((format (eql :golang)))
(let ((scanner (ppcre:create-scanner "(\\d{4})/(\\d{2})/(\\d{2}) (\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{6}))?")))
(lambda (s)
(match ((year month day hour minute seconds microseconds)
(start end)
(scanner s))
(values (local-time:encode-timestamp
(microseconds->nanoseconds (i microseconds))
(i seconds) (i minute) (i hour) (i day) (i month) (i year))
start end)))))
(defmethod make-parser ((format (eql :gnuplot)))
(let ((scanner (ppcre:create-scanner "(\\d{2})/(\\d{2})/(\\d{2}),(\\d{2}):(\\d{2})")))
(lambda (s)
(match ((day month year hour minute)
(start end)
(scanner s))
(values (local-time:encode-timestamp
0 0 (i minute) (i hour) (i day) (i month) (+ 2000 (i year)))
start end)))))
(defmethod make-parser ((format (eql :unix-seconds)))
(let ((scanner (ppcre:create-scanner "(\\d{13,14})")))
(lambda (s)
(match ((unix)
(start end)
(scanner s))
(when unix ; shut up sbcl
(values (local-time:unix-to-timestamp (i unix)) start end))))))
(defmethod make-parser ((format (eql :unix-milliseconds)))
(let ((scanner (ppcre:create-scanner "(\\d{10,11})")))
(lambda (s)
(match ((unix)
(start end)
(scanner s))
(when unix ; shut up sbcl
(multiple-value-bind (sec ms) (truncate (i unix) 1000)
(values (local-time:unix-to-timestamp sec :nsec (milliseconds->nanoseconds ms))
start end)))))))
(defun make-predicate (format start end)
"Return a matching predicate for the user's query.
This predicate will taka a line and return four values:
* The timestamp found, if any.
* Whether the timestamp is inside the filtering bounds, if any.
* The start of the timestamp in the string, if any.
* The end of the timestamp in the string, if any.
"
(let ((parser (make-parser format)))
(lambda (line)
(multiple-value-bind (line-time s e) (funcall parser line)
(values line-time
(and line-time
(or (null start) (local-time:timestamp<= start line-time))
(or (null end) (local-time:timestamp<= line-time end)))
s
e)))))
(defgeneric make-formatter (format))
(defmethod make-formatter (format)
(let ((local-time-format (get-local-time-format format)))
(lambda (time stream)
(local-time:format-timestring stream time :format local-time-format))))
(defmethod make-formatter ((format (eql :golang)))
(lambda (time stream)
(local-time:format-timestring
stream time
:format '((:year 4) #\/ (:month 2) #\/ (:day 2) #\space (:hour 2) #\: (:min 2) #\: (:sec 2)))))
(defmethod make-formatter ((format (eql :gnuplot)))
(lambda (time stream)
(let* ((f '((:day 2) #\/ (:month 2) #\/ #\Y #\, (:hour 2) #\: (:min 2)))
(s (local-time:format-timestring nil time :format f)))
;; "16/07/Y,15:05"
(write-string s stream :start 0 :end 6)
(format stream "~2,'0D" (mod (local-time:timestamp-year time) 100))
(write-string s stream :start 7))))
(defmethod make-formatter ((format (eql :unix-seconds)))
(lambda (time stream)
(format stream "~D" (local-time:timestamp-to-unix time))))
(defmethod make-formatter ((format (eql :unix-milliseconds)))
(lambda (time stream)
(format stream "~D" (+ (* 1000 (local-time:timestamp-to-unix time))
(local-time:timestamp-millisecond time)))))
(defun parse-time-flexibly (string)
;; todo optimize this
(loop :for format :in *time-formats*
:for parser = (make-parser (car format))
:for result = (funcall parser string)
:when result :do (return-from parse-time-flexibly result))
(error "Don't know how to parse ~S as a time." string))
;;;; Run ----------------------------------------------------------------------
(defun run% (predicate in out path prefix reformat only)
(loop
:for line = (read-line in nil)
:while line
; todo support multiple timestamps per line
:do (multiple-value-bind (time in-bounds start end) (funcall predicate line)
(if (null time)
(unless only
(write-line line out))
(when in-bounds
(when prefix
(write-string path out)
(write-char #\: out))
(if reformat
(progn (write-string line out :start 0 :end start)
(funcall reformat time out)
(write-line line out :start end))
(write-line line out)))))))
(defun run (paths &key format start end prefix reformat only)
(when (null paths)
(setf paths '("-")))
(when (and start end (local-time:timestamp< end start))
(error "Start ~S is after end ~S." start end))
(when reformat
(setf reformat (make-formatter reformat)))
(let ((pred (make-predicate format start end)))
(dolist (path paths)
(if (string= "-" path)
(run% pred *standard-input* *standard-output* path prefix reformat only)
(with-open-file (stream path :direction :input)
(run% pred stream *standard-output* path prefix reformat only))))))
;;;; User Interface -----------------------------------------------------------
(defparameter *option-help*
(adopt:make-option 'help
:help "Display help and exit."
:long "help"
:short #\h
:reduce (constantly t)))
(adopt:defparameters (*option-prefix* *option-no-prefix*)
(adopt:make-boolean-options 'prefix
:help "Prefix output lines with their path."
:help-no "Do not prefix output lines with their path (default)."
:short #\p
:long "prefix"))
(adopt:defparameters (*option-only* *option-all*)
(adopt:make-boolean-options 'only
:help "Only output lines containing a timestamp."
:help-no "Output all lines, even those without a timestamp (default)."
:short #\o
:short-no #\a
:long "only"
:long-no "all"))
(defparameter *option-format*
(adopt:make-option 'format
:help "The time format used to parse times from lines."
:parameter "FORMAT"
:long "format"
:short #\f
:initial-value :simple
:key #'keywordize
:reduce #'adopt:last))
(defparameter *option-reformat*
(adopt:make-option 'reformat
:help "Reformat parsed timestamps into FORMAT before outputting them."
:parameter "FORMAT"
:long "reformat"
:short #\r
:initial-value nil
:key #'keywordize
:reduce #'adopt:last))
(defparameter *option-no-reformat*
(adopt:make-option 'reformat
:help "Do not reformat parsed timestamps (default)."
:long "no-reformat"
:short #\R
:reduce (constantly nil)))
(defparameter *option-start*
(adopt:make-option 'start
:help "Only show lines at or after START."
:parameter "START"
:long "start"
:short #\s
:initial-value nil
:key #'parse-time-flexibly
:reduce #'adopt:last))
(defparameter *option-end*
(adopt:make-option 'end
:help "Only show lines at or before END."
:parameter "END"
:long "end"
:short #\e
:initial-value nil
:key #'parse-time-flexibly
:reduce #'adopt:last))
(adopt:define-string *help-text*
"twizzle lets you swizzle timestamps.~@
~@
Use --format to select the incoming timestamp format, and --reformat to ~
swizzle them into a different format if desired. Supported formats:~@
~@
~:
* simple: 2020/11/23 18:55:30 (default)
* rfc-3339: 2020-11-23 18:55:30Z
* iso-8601: 2020-11-23T18:55:30Z
* gnuplot: 11/23/20,18:55~@
~@
You can additionally filter based on a time range using --start and/or --end. ~
For convenience, these parameters can be given in any supported timestamp ~
format, they don't have to match --format.~@
~@
Currently only the first timestamp per line is considered.")
(defparameter *examples*
'(("Filter standard input and only print lines with an RFC-3339 time:"
. "twizzle --format rfc-3339")
("Print log lines after a particular time, and prefix each output line with its source filename:"
. "twizzle **.log --prefix --after '2020/06/14 12:22:01'")
("Print RFC-3339 log lines starting now, with now given in a different format:"
. "tail -f foo | twizzle --format rfc-3339 --after \"$(date --utc --iso-8601=sec)\"")))
(defparameter *ui*
(adopt:make-interface
:name "twizzle"
:usage "[OPTIONS] [FILE...]"
:summary "swizzle timestamps"
:help *help-text*
:examples *examples*
:contents (list *option-help*
(adopt:make-group 'timestamp-formats
:title "Timestamp Formats"
:options (list *option-format*
*option-reformat*
*option-no-reformat*))
(adopt:make-group 'output-filtering
:title "Output Control"
:options (list *option-only*
*option-all*
*option-start*
*option-end*
*option-prefix*
*option-no-prefix*)))))
(defmacro exit-on-error (&body body)
`(handler-case (progn ,@body)
(error (c) (adopt:print-error-and-exit c))))
(defmacro exit-on-ctrl-c (&body body)
`(handler-case
(with-user-abort:with-user-abort (progn ,@body))
(with-user-abort:user-abort () (adopt:exit 130))))
(defun toplevel ()
#+sbcl (sb-ext:disable-debugger)
(exit-on-error
(exit-on-ctrl-c
(multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
(cond
((gethash 'help options) (adopt:print-help-and-exit *ui*))
(t (progn (local-time:reread-timezone-repository)
(run arguments
:format (gethash 'format options)
:start (gethash 'start options)
:end (gethash 'end options)
:prefix (gethash 'prefix options)
:only (gethash 'only options)
:reformat (gethash 'reformat options)))))))))
#; Scratch --------------------------------------------------------------------
(run
'("/home/sjl/scratch/logs/logs/prod/saas-warehouse-i-06b0ea6fe4e6dd3fd.log")
:format :golang
:prefix nil
:reformat :rfc-3339)
(parse-time-flexibly "2020-07-15 16:08:15.0000Z")
(local-time:find-timezone-by-location-name "EDT")
(local-time:reread-timezone-repository)