lisp/gtp.lisp @ 48b460573f0d

Merge
author Steve Losh <steve@stevelosh.com>
date Fri, 06 Nov 2020 11:01:33 -0500
parents 32bdaee0bb8b
children (none)
(eval-when (:compile-toplevel :load-toplevel :execute)
  (ql:quickload '(:adopt :alexandria :cl-ppcre :with-user-abort :local-time)
                :silent t))

(defpackage :gtp
  (:use :cl)
  (:export :toplevel :*ui*))

(in-package :gtp)

;;;; Configuration ------------------------------------------------------------
(defparameter *version* "1.0.0")
(setf local-time:*default-timezone* local-time:+utc-zone+)

(defparameter *time-formats*
  `((:rfc-3339 . ("(\\d{4})-(\\d{2})-(\\d{2})[ T](\\d{2}):(\\d{2}):(\\d{2})(?:[.]\\d{4-})?([+-]\\d{2}:\\d{2}|Z)?"
                  ,local-time:+rfc3339-format+))
    (:iso-8601 . ("(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(?:,\\d{4-})?([+-]\\d{2}:\\d{2}|Z)?"
                  ,local-time:+iso-8601-format+))
    (:simple   . ("(\\d{4})/(\\d{2})/(\\d{2}) (\\d{2}):(\\d{2}):(\\d{2})()"
                  ((:year 4) #\/ (:month 2) #\/ (:day 2) #\space (:hour 2) #\: (:min 2) #\: (:sec 2))))
    (:gnuplot  . ("(\\d{2})/(\\d{2})/(\\d{2}),(\\d{2}):(\\d{2})"
                  ((:day 2) #\/ (:month 2) #\/ #\Y #\, (:hour 2) #\: (:min 2))))
    (:unix-milliseconds . ("(\\d{13,14})" nil))
    (:unix-seconds . ("(\\d{10,11})" nil))))


;;;; Utilities ----------------------------------------------------------------
(defmacro match ((register-vars (start end) (regex target)) &body body)
  (alexandria:with-gensyms (rs re)
    (alexandria:once-only (regex target)
      `(multiple-value-bind (,start ,end ,rs ,re) (ppcre:scan ,regex ,target)
         (when ,start
           (let (,@(loop :for r :from 0
                         :for var :in register-vars
                         :collect `(,var (when (aref ,rs ,r)
                                           (subseq ,target (aref ,rs ,r) (aref ,re ,r))))))
             ,@body))))))

(defun i (s)
  (parse-integer s))

(defun keywordize (s)
  (alexandria:make-keyword (string-upcase s)))


;;;; Time Formats -------------------------------------------------------------

(defun get-format (format)
  (or (alexandria:assoc-value *time-formats* format)
      (error "Unknown time format ~S" format)))

(defun get-regex (format)
  (first (get-format format)))

(defun get-local-time-format (format)
  (second (get-format format)))

(defun parse-timezone (string)
  (if (member string '(nil "" "Z" "UTC" "+00:00" "-00:00") :test #'equal)
    local-time:+utc-zone+
    (or (local-time:find-timezone-by-location-name string)
        (error "TODO: handle timezone ~S" string))))


(defgeneric make-parser (format))

(defmethod make-parser (format)
  (let ((scanner (ppcre:create-scanner (get-regex format))))
    (lambda (s)
      (match ((year month day hour minute second timezone)
              (start end)
              (scanner s))
        (values
          (local-time:encode-timestamp
            0 (i second) (i minute) (i hour) (i day) (i month) (i year)
            :timezone (parse-timezone timezone))
          start end)))))

(defmethod make-parser ((format (eql :gnuplot)))
  (let ((scanner (ppcre:create-scanner (get-regex format))))
    (lambda (s)
      (match ((day month year hour minute)
              (start end)
              (scanner s))
        (values (local-time:encode-timestamp
                  0 0 (i minute) (i hour) (i day) (i month) (+ 2000 (i year)))
                start end)))))

(defmethod make-parser ((format (eql :unix-seconds)))
  (let ((scanner (ppcre:create-scanner (get-regex format))))
    (lambda (s)
      (match ((unix)
              (start end)
              (scanner s))
        (when unix ; shut up sbcl
          (values (local-time:unix-to-timestamp (parse-integer unix)) start end))))))

(defmethod make-parser ((format (eql :unix-milliseconds)))
  (let ((scanner (ppcre:create-scanner (get-regex format))))
    (lambda (s)
      (match ((unix)
              (start end)
              (scanner s))
        (when unix ; shut up sbcl
          (multiple-value-bind (sec ms) (truncate (parse-integer unix) 1000)
            (values (local-time:unix-to-timestamp sec :nsec (* ms 1000 1000))
                    start end)))))))


(defun make-predicate (format start end)
  (let ((parser (make-parser format)))
    (lambda (line)
      (multiple-value-bind (line-time s e) (funcall parser line)
        (when (and line-time
                   (or (null start) (local-time:timestamp<= start line-time))
                   (or (null end) (local-time:timestamp<= line-time end)))
          (values line-time s e))))))


(defgeneric make-formatter (format))

(defmethod make-formatter (format)
  (let ((local-time-format (get-local-time-format format)))
    (lambda (time stream)
      (local-time:format-timestring stream time :format local-time-format))))

(defmethod make-formatter ((format (eql :gnuplot)))
  (let ((local-time-format (get-local-time-format :gnuplot)))
    (lambda (time stream)
      (let ((s (local-time:format-timestring nil time :format local-time-format)))
        ;; "16/07/Y,15:05"
        (write-string s stream :start 0 :end 6)
        (format stream "~2,'0D" (mod (local-time:timestamp-year time) 100))
        (write-string s stream :start 7)))))

(defmethod make-formatter ((format (eql :unix-seconds)))
  (lambda (time stream)
    (format stream "~D" (local-time:timestamp-to-unix time))))

(defmethod make-formatter ((format (eql :unix-milliseconds)))
  (lambda (time stream)
    (format stream "~D" (+ (* 1000 (local-time:timestamp-to-unix time))
                           (local-time:timestamp-millisecond time)))))


(defun parse-time-flexibly (string)
  ;; todo optimize this
  (loop :for format :in *time-formats*
        :for parser = (make-parser (car format))
        :for result = (funcall parser string)
        :when result :do (return-from parse-time-flexibly result))
  (error "Don't know how to parse ~S as a time." string))


;;;; Run ----------------------------------------------------------------------
(defun run% (predicate in out path prefix reformat)
  (loop
    :for line = (read-line in nil)
    :while line
    :do (multiple-value-bind (time start end) (funcall predicate line)
          (when time
            (when prefix
              (write-string path out)
              (write-char #\: out))
            (if reformat
              (progn (write-string line out :start 0 :end start)
                     (funcall reformat time out)
                     (write-line line out :start end))
              (write-line line out))))))

(defun run (paths &key format start end prefix reformat)
  (when (null paths)
    (setf paths '("-")))
  (when (and start end (local-time:timestamp< end start))
    (error "Start ~S is after end ~S." start end))
  (when reformat
    (setf reformat (make-formatter reformat)))
  (let ((pred (make-predicate format start end)))
    (dolist (path paths)
      (if (string= "-" path)
        (run% pred *standard-input* *standard-output* path prefix reformat)
        (with-open-file (stream path :direction :input)
          (run% pred stream *standard-output* path prefix reformat))))))


;;;; User Interface -----------------------------------------------------------
(defparameter *option-help*
  (adopt:make-option 'help
    :help "Display help and exit."
    :long "help"
    :short #\h
    :reduce (constantly t)))

(defparameter *option-version*
  (adopt:make-option 'version
    :help "Display version information and exit."
    :long "version"
    :reduce (constantly t)))


(defparameter *option-prefix*
  (adopt:make-option 'prefix
    :help "Prefix output lines with their path."
    :short #\p
    :long "prefix"
    :reduce (constantly t)))

(defparameter *option-no-prefix*
  (adopt:make-option 'no-prefix
    :result-key 'prefix
    :help "Do not prefix output lines with their path (default)."
    :short #\P
    :long "no-prefix"
    :reduce (constantly nil)))


(defparameter *option-format*
  (adopt:make-option 'format
    :help "The time format used to parse times from lines."
    :parameter "FORMAT"
    :long "format"
    :short #\f
    :initial-value :simple
    :key #'keywordize
    :reduce #'adopt:last))

(defparameter *option-reformat*
  (adopt:make-option 'reformat
    :help "Reformat parsed timestamps into FORMAT before outputting them."
    :parameter "FORMAT"
    :long "reformat"
    :short #\r
    :initial-value nil
    :key #'keywordize
    :reduce #'adopt:last))

(defparameter *option-no-reformat*
  (adopt:make-option 'reformat
    :help "Do not reformat parsed timestamps (default)."
    :long "no-reformat"
    :short #\R
    :reduce (constantly nil)))


(defparameter *option-start*
  (adopt:make-option 'start
    :help "Only show lines at or after START."
    :parameter "START"
    :long "start"
    :short #\s
    :initial-value nil
    :key #'parse-time-flexibly
    :reduce #'adopt:last))

(defparameter *option-end*
  (adopt:make-option 'end
    :help "Only show lines at or before END."
    :parameter "END"
    :long "end"
    :short #\e
    :initial-value nil
    :key #'parse-time-flexibly
    :reduce #'adopt:last))


(adopt:define-string *help-text*
  "gtp filters lines by time.  Instead of g/re/p it's g/time/p.~@
   ~@
   gtp will only print lines that have a timestamp somewhere in them.  Use ~
   --format to select the timestamp format.  Supported formats:~@
   ~@
   ~:
   * simple:   2020/11/23 18:55:30 (default)
   * rfc-3339: 2020-11-23 18:55:30Z
   * iso-8601: 2020-11-23T18:55:30Z
   * gnuplot:  11/23/20,18:55~@
   ~@
   You can additionally filter based on a time range using --start and/or --end. ~
   For convenience, these parameters can be given in any supported timestamp ~
   format, they don't have to match --format.")

(defparameter *examples*
  '(("Filter standard input and only print lines with an RFC-3339 time:"
     . "gtp --format rfc-3339")
    ("Print log lines after a particular time, and prefix each output line with its source filename:"
     . "gtp **.log --prefix --after '2020/06/14 12:22:01'")
    ("Print RFC-3339 log lines starting now, with now given in a different format:"
     . "tail -f foo | gtp --format rfc-3339 --after \"$(date --utc --iso-8601=sec)\"")))


(defparameter *ui*
  (adopt:make-interface
    :name "gtp"
    :usage "[OPTIONS] [FILE...]"
    :summary "filter lines by timestamp"
    :help *help-text*
    :examples *examples*
    :contents (list *option-help*
                    *option-version*
                    *option-format*
                    *option-reformat*
                    *option-no-reformat*
                    *option-start*
                    *option-end*
                    *option-prefix*
                    *option-no-prefix*)))


(defmacro exit-on-error (&body body)
  `(handler-case (progn ,@body)
     (error (c) (adopt:print-error-and-exit c))))

(defmacro exit-on-ctrl-c (&body body)
  `(handler-case
       (with-user-abort:with-user-abort (progn ,@body))
     (with-user-abort:user-abort () (adopt:exit 130))))


(defun toplevel ()
  #+sbcl (sb-ext:disable-debugger)
  (exit-on-error
    (exit-on-ctrl-c
      (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
        (cond
          ((gethash 'help options) (adopt:print-help-and-exit *ui*))
          ((gethash 'version options) (write-line *version*) (adopt:exit))
          (t (progn (local-time:reread-timezone-repository)
                    (run arguments
                         :format (gethash 'format options)
                         :start (gethash 'start options)
                         :end (gethash 'end options)
                         :prefix (gethash 'prefix options)
                         :reformat (gethash 'reformat options)))))))))


#; Scratch --------------------------------------------------------------------

(run
  '("/home/sjl/scratch/logs/test/passport/passport.172.24.20.49.log")
  :format :simple
  :start (parse-time-flexibly "2020-07-15T17:31:00.000000")
  :end (parse-time-flexibly "2020-07-15T17:31:55")
  :prefix nil
  :reformat :iso-8601)

(parse-time-flexibly "2020-07-15 16:08:15.0000Z")

(local-time:find-timezone-by-location-name "EDT")

(local-time:reread-timezone-repository)