lisp/bucket.lisp @ a65fd2691c94 default tip
More
| author | Steve Losh <steve@stevelosh.com> |
|---|---|
| date | Mon, 03 Nov 2025 14:55:17 -0500 |
| parents | f37e47cda7c4 |
| children | (none) |
(eval-when (:compile-toplevel :load-toplevel :execute) (ql:quickload '(:adopt :alexandria :cl-ppcre :with-user-abort :local-time) :silent t)) (defpackage :bucket (:use :cl) (:export :toplevel :*ui*)) (in-package :bucket) ;;;; Configuration ------------------------------------------------------------ (setf local-time:*default-timezone* local-time:+utc-zone+) ;;;; Functionality ------------------------------------------------------------ (defun collect-values (time-mode path) (flet ((collect (stream) (loop :for line = (read-line stream nil nil) :while line :for value = (if time-mode (local-time:parse-rfc3339-timestring line) (parse-integer line :junk-allowed t)) :when value :collect value))) (if (string= path "-") (collect *standard-input*) (with-open-file (s path) (collect s))))) (defun floor-to-nearest (n divisor) (* divisor (floor n divisor))) (declaim (inline in-range-p in-time-p)) (defun in-range-p (val start end) (and (<= start val) (< val end))) (defun in-time-p (val start end) (and (local-time:timestamp<= start val) (local-time:timestamp< val end))) (defun bucket-numeric (data width skip-empty output) (loop :with data = (sort (coerce data 'vector) #'<) :with len = (length data) :with i = 0 :for bs :from (floor-to-nearest (aref data 0) width) :by width :for be = (+ bs width) :while (< i len) :for count = (loop :while (< i len) :while (in-range-p (aref data i) bs be) :sum 1 :do (incf i)) :do (unless (and skip-empty (zerop count)) (format t "~12F ~10D~%" (ecase output (:lower bs) (:upper be) (:mid (/ (+ bs bs width) 2.0d0))) count)))) (defun bucket-temporal (data width skip-empty output) (loop :with data = (sort (coerce data 'vector) #'local-time:timestamp<) :with len = (length data) :with i = 0 :with bs = (local-time:timestamp-minimize-part (aref data 0) :sec) :for be = (local-time:timestamp+ bs width :sec) :while (< i len) :for count = (loop :while (< i len) :while (in-time-p (aref data i) bs be) :sum 1 :do (incf i)) :do (unless (and skip-empty (zerop count)) (format t "~A ~10D~%" (local-time:format-rfc3339-timestring nil (ecase output (:lower bs) (:upper be) (:mid (local-time:timestamp+ bs (truncate (* width 1000000000) 2) :nsec)))) ; hack count)) :do (setf bs (local-time:timestamp+ bs width :sec)))) (defun run (paths &key time-mode width skip-empty (output :mid)) (when (null width) (error "Bucket width must be specified.")) (let ((data (mapcan (alexandria:curry #'collect-values time-mode) (or paths '("-"))))) (when (null data) (error "No data found.")) (if time-mode (bucket-temporal data width skip-empty output) (bucket-numeric data width skip-empty output)))) ;;;; User Interface ----------------------------------------------------------- (defparameter *option/help* (adopt:make-option 'help :help "Display help and exit." :long "help" :short #\h :reduce (constantly t))) (adopt:defparameters (*option/time* *option/no-time*) (adopt:make-boolean-options 'time :help "Bucket in timestamp (RFC3339) mode." :help-no "Bucket in numeric mode (the default)." :long "time" :short #\t)) (adopt:defparameters (*option/skip-empty* *option/no-skip-empty*) (adopt:make-boolean-options 'skip-empty :help "Skip outputting empty buckets." :help-no "Include empty buckets (the default)." :long "skip-empty" :short #\e)) (defparameter *option/output/lower* (adopt:make-option 'output/lower :result-key 'output :help "Output the lower bound of the bucket." :long "lower" :short #\l :reduce (constantly :lower))) (defparameter *option/output/mid* (adopt:make-option 'output/mid :result-key 'output :help "Output the midpoint of the bucket (the default)." :long "mid" :short #\m :initial-value :mid :reduce (constantly :mid))) (defparameter *option/output/upper* (adopt:make-option 'output/upper :result-key 'output :help "Output the upper bound of the bucket." :long "upper" :short #\u :reduce (constantly :upper))) (defparameter *option/width* (adopt:make-option 'width :result-key 'width :help "Set bucket width to N." :parameter "N" :long "width" :short #\W :initial-value nil :key #'parse-integer :reduce #'adopt:last)) (defparameter *option/width/seconds* (adopt:make-option 'width/seconds :result-key 'width :help "Set bucket width to N seconds." :parameter "N" :long "seconds" :short #\S :key (lambda (n) (parse-integer n)) :reduce #'adopt:last)) (defparameter *option/width/minutes* (adopt:make-option 'width/minutes :result-key 'width :help "Set bucket width to N minutes." :parameter "N" :long "minutes" :short #\M :key (lambda (n) (* 60 (parse-integer n))) :reduce #'adopt:last)) (defparameter *option/width/hours* (adopt:make-option 'width/hours :result-key 'width :help "Set bucket width to N hours." :parameter "N" :long "hours" :short #\H :key (lambda (n) (* 60 60 (parse-integer n))) :reduce #'adopt:last)) (adopt:define-string *help-text* "bucket groups lines into histogrammy buckets.~@ ~@ This is handy if you have some non-bucketed data that you want to graph as ~ a histogram with gnuplot, because gnuplot unbelievably does not have built-in ~ histogramming.") (defparameter *examples* '()) (defparameter *ui* (adopt:make-interface :name "bucket" :usage "[OPTIONS] [FILE...]" :summary "bucket things for easier histogramming" :help *help-text* :examples *examples* :contents (list *option/help* *option/time* *option/no-time* *option/skip-empty* *option/no-skip-empty* (adopt:make-group 'bucket-width :title "Bucket Widths" :options (list *option/width* *option/width/seconds* *option/width/minutes* *option/width/hours*)) (adopt:make-group 'bucket-labels :title "Bucket Labels" :options (list *option/output/lower* *option/output/mid* *option/output/upper*))))) (defun toplevel () ;; #+sbcl (sb-ext:disable-debugger) (handler-case (adopt::quit-on-ctrl-c () (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*) (cond ((gethash 'help options) (adopt:print-help-and-exit *ui*)) (t (progn (local-time:reread-timezone-repository) (run arguments :time-mode (gethash 'time options) :width (gethash 'width options) :skip-empty (gethash 'skip-empty options) :output (gethash 'output options))))))) (error (c) (adopt:print-error-and-exit c)))) #; Scratch -------------------------------------------------------------------- (ppcre:scan "(\\d{4})-(\\d{2})-(\\d{2})[ T](\\d{2}):(\\d{2}):(\\d{2})(?:\\.\\d+)?([+-]\\d{2}:\\d{2}|Z)?" "2022-04-25T17:00:41.289049Z")