lisp/twizzle.lisp @ f37e47cda7c4

More
author Steve Losh <steve@stevelosh.com>
date Wed, 04 May 2022 20:33:37 -0400
parents (none)
children (none)
(eval-when (:compile-toplevel :load-toplevel :execute)
  (ql:quickload '(:adopt :alexandria :cl-ppcre :with-user-abort :local-time)
                :silent t))

(defpackage :twizzle
  (:use :cl)
  (:export :toplevel :*ui*))

(in-package :twizzle)

;;;; Configuration ------------------------------------------------------------
(setf local-time:*default-timezone* local-time:+utc-zone+)

(defparameter *time-formats*
  ;; An alist of (name . (parse-regex local-time-format-spec)), or (name . nil)
  ;; for more complicated formats.
  `((:rfc-3339 . ("(\\d{4})-(\\d{2})-(\\d{2})[ T](\\d{2}):(\\d{2}):(\\d{2})(?:[.]\\d+)?([+-]\\d{2}:\\d{2}|Z)?"
                  ,local-time:+rfc3339-format+))
    (:iso-8601 . ("(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(?:,\\d+)?([+-]\\d{2}:\\d{2}|Z)?"
                  ,local-time:+iso-8601-format+))
    (:simple   . ("(\\d{4})/(\\d{2})/(\\d{2}) (\\d{2}):(\\d{2}):(\\d{2})()"
                  ((:year 4) #\/ (:month 2) #\/ (:day 2) #\space (:hour 2) #\: (:min 2) #\: (:sec 2))))
    (:gnuplot           . nil)
    (:golang            . nil)
    (:unix-seconds      . nil)
    (:unix-milliseconds . nil)))


;;;; Utilities ----------------------------------------------------------------
(defmacro match ((register-vars (start end) (regex target)) &body body)
  (alexandria:with-gensyms (rs re)
    (alexandria:once-only (regex target)
      `(multiple-value-bind (,start ,end ,rs ,re) (ppcre:scan ,regex ,target)
         (when ,start
           (let (,@(loop :for r :from 0
                         :for var :in register-vars
                         :collect `(,var (when (aref ,rs ,r)
                                           (subseq ,target (aref ,rs ,r) (aref ,re ,r))))))
             ,@body))))))

(defun i (s)
  (parse-integer s))

(defun keywordize (s)
  (alexandria:make-keyword (string-upcase s)))


;;;; Time Formats -------------------------------------------------------------
(defun microseconds->nanoseconds (msec)
  (* msec 1000))

(defun milliseconds->nanoseconds (msec)
  (* msec 1000 1000))


(defun get-format (format)
  (or (alexandria:assoc-value *time-formats* format)
      (error "Unknown time format ~S" format)))

(defun get-regex (format)
  (first (get-format format)))

(defun get-local-time-format (format)
  (second (get-format format)))


(defun parse-timezone (string)
  (if (member string '(nil "" "Z" "UTC" "+00:00" "-00:00") :test #'equal)
    local-time:+utc-zone+
    (or (local-time:find-timezone-by-location-name string)
        (error "TODO: handle timezone ~S" string))))


(defgeneric make-parser (format)
  (:documentation "Return a parsing function for the given format."))

(defmethod make-parser (format)
  (let ((scanner (ppcre:create-scanner (get-regex format))))
    (lambda (s)
      (match ((year month day hour minute second timezone)
              (start end)
              (scanner s))
        (values
          (local-time:encode-timestamp
            0 (i second) (i minute) (i hour) (i day) (i month) (i year)
            :timezone (parse-timezone timezone))
          start end)))))

(defmethod make-parser ((format (eql :golang)))
  (let ((scanner (ppcre:create-scanner "(\\d{4})/(\\d{2})/(\\d{2}) (\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{6}))?")))
    (lambda (s)
      (match ((year month day hour minute seconds microseconds)
              (start end)
              (scanner s))
        (values (local-time:encode-timestamp
                  (microseconds->nanoseconds (i microseconds))
                  (i seconds) (i minute) (i hour) (i day) (i month) (i year))
                start end)))))

(defmethod make-parser ((format (eql :gnuplot)))
  (let ((scanner (ppcre:create-scanner "(\\d{2})/(\\d{2})/(\\d{2}),(\\d{2}):(\\d{2})")))
    (lambda (s)
      (match ((day month year hour minute)
              (start end)
              (scanner s))
        (values (local-time:encode-timestamp
                  0 0 (i minute) (i hour) (i day) (i month) (+ 2000 (i year)))
                start end)))))

(defmethod make-parser ((format (eql :unix-seconds)))
  (let ((scanner (ppcre:create-scanner "(\\d{13,14})")))
    (lambda (s)
      (match ((unix)
              (start end)
              (scanner s))
        (when unix ; shut up sbcl
          (values (local-time:unix-to-timestamp (i unix)) start end))))))

(defmethod make-parser ((format (eql :unix-milliseconds)))
  (let ((scanner (ppcre:create-scanner "(\\d{10,11})")))
    (lambda (s)
      (match ((unix)
              (start end)
              (scanner s))
        (when unix ; shut up sbcl
          (multiple-value-bind (sec ms) (truncate (i unix) 1000)
            (values (local-time:unix-to-timestamp sec :nsec (milliseconds->nanoseconds ms))
                    start end)))))))


(defun make-predicate (format start end)
  "Return a matching predicate for the user's query.

  This predicate will taka a line and return four values:

  * The timestamp found, if any.
  * Whether the timestamp is inside the filtering bounds, if any.
  * The start of the timestamp in the string, if any.
  * The end of the timestamp in the string, if any.

  "
  (let ((parser (make-parser format)))
    (lambda (line)
      (multiple-value-bind (line-time s e) (funcall parser line)
        (values line-time
                (and line-time
                     (or (null start) (local-time:timestamp<= start line-time))
                     (or (null end) (local-time:timestamp<= line-time end)))
                s
                e)))))


(defgeneric make-formatter (format))

(defmethod make-formatter (format)
  (let ((local-time-format (get-local-time-format format)))
    (lambda (time stream)
      (local-time:format-timestring stream time :format local-time-format))))

(defmethod make-formatter ((format (eql :golang)))
  (lambda (time stream)
    (local-time:format-timestring
      stream time
      :format '((:year 4) #\/ (:month 2) #\/ (:day 2) #\space (:hour 2) #\: (:min 2) #\: (:sec 2)))))

(defmethod make-formatter ((format (eql :gnuplot)))
  (lambda (time stream)
    (let* ((f '((:day 2) #\/ (:month 2) #\/ #\Y #\, (:hour 2) #\: (:min 2)))
           (s (local-time:format-timestring nil time :format f)))
      ;; "16/07/Y,15:05"
      (write-string s stream :start 0 :end 6)
      (format stream "~2,'0D" (mod (local-time:timestamp-year time) 100))
      (write-string s stream :start 7))))

(defmethod make-formatter ((format (eql :unix-seconds)))
  (lambda (time stream)
    (format stream "~D" (local-time:timestamp-to-unix time))))

(defmethod make-formatter ((format (eql :unix-milliseconds)))
  (lambda (time stream)
    (format stream "~D" (+ (* 1000 (local-time:timestamp-to-unix time))
                           (local-time:timestamp-millisecond time)))))


(defun parse-time-flexibly (string)
  ;; todo optimize this
  (loop :for format :in *time-formats*
        :for parser = (make-parser (car format))
        :for result = (funcall parser string)
        :when result :do (return-from parse-time-flexibly result))
  (error "Don't know how to parse ~S as a time." string))


;;;; Run ----------------------------------------------------------------------
(defun run% (predicate in out path prefix reformat only)
  (loop
    :for line = (read-line in nil)
    :while line
    ; todo support multiple timestamps per line
    :do (multiple-value-bind (time in-bounds start end) (funcall predicate line)
          (if (null time)
            (unless only
              (write-line line out))
            (when in-bounds
              (when prefix
                (write-string path out)
                (write-char #\: out))
              (if reformat
                (progn (write-string line out :start 0 :end start)
                       (funcall reformat time out)
                       (write-line line out :start end))
                (write-line line out)))))))

(defun run (paths &key format start end prefix reformat only)
  (when (null paths)
    (setf paths '("-")))
  (when (and start end (local-time:timestamp< end start))
    (error "Start ~S is after end ~S." start end))
  (when reformat
    (setf reformat (make-formatter reformat)))
  (let ((pred (make-predicate format start end)))
    (dolist (path paths)
      (if (string= "-" path)
        (run% pred *standard-input* *standard-output* path prefix reformat only)
        (with-open-file (stream path :direction :input)
          (run% pred stream *standard-output* path prefix reformat only))))))


;;;; User Interface -----------------------------------------------------------
(defparameter *option-help*
  (adopt:make-option 'help
    :help "Display help and exit."
    :long "help"
    :short #\h
    :reduce (constantly t)))


(adopt:defparameters (*option-prefix* *option-no-prefix*)
  (adopt:make-boolean-options 'prefix
    :help "Prefix output lines with their path."
    :help-no "Do not prefix output lines with their path (default)."
    :short #\p
    :long "prefix"))


(adopt:defparameters (*option-only* *option-all*)
  (adopt:make-boolean-options 'only
    :help "Only output lines containing a timestamp."
    :help-no "Output all lines, even those without a timestamp (default)."
    :short #\o
    :short-no #\a
    :long "only"
    :long-no "all"))


(defparameter *option-format*
  (adopt:make-option 'format
    :help "The time format used to parse times from lines."
    :parameter "FORMAT"
    :long "format"
    :short #\f
    :initial-value :simple
    :key #'keywordize
    :reduce #'adopt:last))

(defparameter *option-reformat*
  (adopt:make-option 'reformat
    :help "Reformat parsed timestamps into FORMAT before outputting them."
    :parameter "FORMAT"
    :long "reformat"
    :short #\r
    :initial-value nil
    :key #'keywordize
    :reduce #'adopt:last))

(defparameter *option-no-reformat*
  (adopt:make-option 'reformat
    :help "Do not reformat parsed timestamps (default)."
    :long "no-reformat"
    :short #\R
    :reduce (constantly nil)))


(defparameter *option-start*
  (adopt:make-option 'start
    :help "Only show lines at or after START."
    :parameter "START"
    :long "start"
    :short #\s
    :initial-value nil
    :key #'parse-time-flexibly
    :reduce #'adopt:last))

(defparameter *option-end*
  (adopt:make-option 'end
    :help "Only show lines at or before END."
    :parameter "END"
    :long "end"
    :short #\e
    :initial-value nil
    :key #'parse-time-flexibly
    :reduce #'adopt:last))


(adopt:define-string *help-text*
  "twizzle lets you swizzle timestamps.~@
   ~@
   Use --format to select the incoming timestamp format, and --reformat to ~
   swizzle them into a different format if desired.  Supported formats:~@
   ~@
   ~:
   * simple:   2020/11/23 18:55:30 (default)
   * rfc-3339: 2020-11-23 18:55:30Z
   * iso-8601: 2020-11-23T18:55:30Z
   * gnuplot:  11/23/20,18:55~@
   ~@
   You can additionally filter based on a time range using --start and/or --end. ~
   For convenience, these parameters can be given in any supported timestamp ~
   format, they don't have to match --format.~@
   ~@
   Currently only the first timestamp per line is considered.")

(defparameter *examples*
  '(("Filter standard input and only print lines with an RFC-3339 time:"
     . "twizzle --format rfc-3339")
    ("Print log lines after a particular time, and prefix each output line with its source filename:"
     . "twizzle **.log --prefix --after '2020/06/14 12:22:01'")
    ("Print RFC-3339 log lines starting now, with now given in a different format:"
     . "tail -f foo | twizzle --format rfc-3339 --after \"$(date --utc --iso-8601=sec)\"")))


(defparameter *ui*
  (adopt:make-interface
    :name "twizzle"
    :usage "[OPTIONS] [FILE...]"
    :summary "swizzle timestamps"
    :help *help-text*
    :examples *examples*
    :contents (list *option-help*
                    (adopt:make-group 'timestamp-formats
                      :title "Timestamp Formats"
                      :options (list *option-format*
                                     *option-reformat*
                                     *option-no-reformat*))
                    (adopt:make-group 'output-filtering
                      :title "Output Control"
                      :options (list *option-only*
                                     *option-all*
                                     *option-start*
                                     *option-end*
                                     *option-prefix*
                                     *option-no-prefix*)))))


(defmacro exit-on-error (&body body)
  `(handler-case (progn ,@body)
     (error (c) (adopt:print-error-and-exit c))))

(defmacro exit-on-ctrl-c (&body body)
  `(handler-case
       (with-user-abort:with-user-abort (progn ,@body))
     (with-user-abort:user-abort () (adopt:exit 130))))


(defun toplevel ()
  #+sbcl (sb-ext:disable-debugger)
  (exit-on-error
    (exit-on-ctrl-c
      (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
        (cond
          ((gethash 'help options) (adopt:print-help-and-exit *ui*))
          (t (progn (local-time:reread-timezone-repository)
                    (run arguments
                         :format (gethash 'format options)
                         :start (gethash 'start options)
                         :end (gethash 'end options)
                         :prefix (gethash 'prefix options)
                         :only (gethash 'only options)
                         :reformat (gethash 'reformat options)))))))))


#; Scratch --------------------------------------------------------------------

(run
  '("/home/sjl/scratch/logs/logs/prod/saas-warehouse-i-06b0ea6fe4e6dd3fd.log")
  :format :golang
  :prefix nil
  :reformat :rfc-3339)

(parse-time-flexibly "2020-07-15 16:08:15.0000Z")

(local-time:find-timezone-by-location-name "EDT")

(local-time:reread-timezone-repository)