More
author |
Steve Losh <steve@stevelosh.com> |
date |
Fri, 16 Feb 2024 14:12:21 -0500 |
parents |
f37e47cda7c4 |
children |
(none) |
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload '(:adopt :alexandria :cl-ppcre :with-user-abort :local-time)
:silent t))
(defpackage :bucket
(:use :cl)
(:export :toplevel :*ui*))
(in-package :bucket)
;;;; Configuration ------------------------------------------------------------
(setf local-time:*default-timezone* local-time:+utc-zone+)
;;;; Functionality ------------------------------------------------------------
(defun collect-values (time-mode path)
(flet ((collect (stream)
(loop :for line = (read-line stream nil nil)
:while line
:for value = (if time-mode
(local-time:parse-rfc3339-timestring line)
(parse-integer line :junk-allowed t))
:when value :collect value)))
(if (string= path "-")
(collect *standard-input*)
(with-open-file (s path)
(collect s)))))
(defun floor-to-nearest (n divisor)
(* divisor (floor n divisor)))
(declaim (inline in-range-p in-time-p))
(defun in-range-p (val start end)
(and (<= start val)
(< val end)))
(defun in-time-p (val start end)
(and (local-time:timestamp<= start val)
(local-time:timestamp< val end)))
(defun bucket-numeric (data width skip-empty output)
(loop :with data = (sort (coerce data 'vector) #'<)
:with len = (length data)
:with i = 0
:for bs :from (floor-to-nearest (aref data 0) width) :by width
:for be = (+ bs width)
:while (< i len)
:for count = (loop :while (< i len)
:while (in-range-p (aref data i) bs be)
:sum 1
:do (incf i))
:do (unless (and skip-empty (zerop count))
(format t "~12F ~10D~%"
(ecase output
(:lower bs)
(:upper be)
(:mid (/ (+ bs bs width) 2.0d0)))
count))))
(defun bucket-temporal (data width skip-empty output)
(loop :with data = (sort (coerce data 'vector) #'local-time:timestamp<)
:with len = (length data)
:with i = 0
:with bs = (local-time:timestamp-minimize-part (aref data 0) :sec)
:for be = (local-time:timestamp+ bs width :sec)
:while (< i len)
:for count = (loop :while (< i len)
:while (in-time-p (aref data i) bs be)
:sum 1
:do (incf i))
:do (unless (and skip-empty (zerop count))
(format t "~A ~10D~%"
(local-time:format-rfc3339-timestring
nil
(ecase output
(:lower bs)
(:upper be)
(:mid (local-time:timestamp+ bs (truncate (* width 1000000000) 2) :nsec)))) ; hack
count))
:do (setf bs (local-time:timestamp+ bs width :sec))))
(defun run (paths &key time-mode width skip-empty (output :mid))
(when (null width)
(error "Bucket width must be specified."))
(let ((data (mapcan (alexandria:curry #'collect-values time-mode)
(or paths '("-")))))
(when (null data)
(error "No data found."))
(if time-mode
(bucket-temporal data width skip-empty output)
(bucket-numeric data width skip-empty output))))
;;;; User Interface -----------------------------------------------------------
(defparameter *option/help*
(adopt:make-option 'help
:help "Display help and exit."
:long "help"
:short #\h
:reduce (constantly t)))
(adopt:defparameters (*option/time* *option/no-time*)
(adopt:make-boolean-options 'time
:help "Bucket in timestamp (RFC3339) mode."
:help-no "Bucket in numeric mode (the default)."
:long "time"
:short #\t))
(adopt:defparameters (*option/skip-empty* *option/no-skip-empty*)
(adopt:make-boolean-options 'skip-empty
:help "Skip outputting empty buckets."
:help-no "Include empty buckets (the default)."
:long "skip-empty"
:short #\e))
(defparameter *option/output/lower*
(adopt:make-option 'output/lower
:result-key 'output
:help "Output the lower bound of the bucket."
:long "lower"
:short #\l
:reduce (constantly :lower)))
(defparameter *option/output/mid*
(adopt:make-option 'output/mid
:result-key 'output
:help "Output the midpoint of the bucket (the default)."
:long "mid"
:short #\m
:initial-value :mid
:reduce (constantly :mid)))
(defparameter *option/output/upper*
(adopt:make-option 'output/upper
:result-key 'output
:help "Output the upper bound of the bucket."
:long "upper"
:short #\u
:reduce (constantly :upper)))
(defparameter *option/width*
(adopt:make-option 'width
:result-key 'width
:help "Set bucket width to N."
:parameter "N"
:long "width"
:short #\W
:initial-value nil
:key #'parse-integer
:reduce #'adopt:last))
(defparameter *option/width/seconds*
(adopt:make-option 'width/seconds
:result-key 'width
:help "Set bucket width to N seconds."
:parameter "N"
:long "seconds"
:short #\S
:key (lambda (n)
(parse-integer n))
:reduce #'adopt:last))
(defparameter *option/width/minutes*
(adopt:make-option 'width/minutes
:result-key 'width
:help "Set bucket width to N minutes."
:parameter "N"
:long "minutes"
:short #\M
:key (lambda (n)
(* 60 (parse-integer n)))
:reduce #'adopt:last))
(defparameter *option/width/hours*
(adopt:make-option 'width/hours
:result-key 'width
:help "Set bucket width to N hours."
:parameter "N"
:long "hours"
:short #\H
:key (lambda (n)
(* 60 60 (parse-integer n)))
:reduce #'adopt:last))
(adopt:define-string *help-text*
"bucket groups lines into histogrammy buckets.~@
~@
This is handy if you have some non-bucketed data that you want to graph as ~
a histogram with gnuplot, because gnuplot unbelievably does not have built-in ~
histogramming.")
(defparameter *examples*
'())
(defparameter *ui*
(adopt:make-interface
:name "bucket"
:usage "[OPTIONS] [FILE...]"
:summary "bucket things for easier histogramming"
:help *help-text*
:examples *examples*
:contents (list *option/help*
*option/time*
*option/no-time*
*option/skip-empty*
*option/no-skip-empty*
(adopt:make-group 'bucket-width
:title "Bucket Widths"
:options (list
*option/width*
*option/width/seconds*
*option/width/minutes*
*option/width/hours*))
(adopt:make-group 'bucket-labels
:title "Bucket Labels"
:options (list
*option/output/lower*
*option/output/mid*
*option/output/upper*)))))
(defun toplevel ()
;; #+sbcl (sb-ext:disable-debugger)
(handler-case
(adopt::quit-on-ctrl-c ()
(multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
(cond
((gethash 'help options) (adopt:print-help-and-exit *ui*))
(t (progn (local-time:reread-timezone-repository)
(run arguments
:time-mode (gethash 'time options)
:width (gethash 'width options)
:skip-empty (gethash 'skip-empty options)
:output (gethash 'output options)))))))
(error (c) (adopt:print-error-and-exit c))))
#; Scratch --------------------------------------------------------------------
(ppcre:scan "(\\d{4})-(\\d{2})-(\\d{2})[ T](\\d{2}):(\\d{2}):(\\d{2})(?:\\.\\d+)?([+-]\\d{2}:\\d{2}|Z)?"
"2022-04-25T17:00:41.289049Z")