Add opaque-json parsing
This doesn't actually seem to save that much as implemented — turns out
`write-char` is not cheap. I wonder if it's even worth implementing this, or if
just using the vanilla parsing is enough. I may remove all this and just use
`t` to preserve extra fields in the future.
author |
Steve Losh <steve@stevelosh.com> |
date |
Sat, 15 Aug 2020 00:27:02 -0400 |
parents |
6d9143f80df3 |
children |
700d5c649245 |
(in-package :jarl)
;;;; Input --------------------------------------------------------------------
(defparameter *read-size-limit* (expt 2 30)
"The maximum number of characters to read in a single `jarl:read` call.
If more than this number of characters would have to be read to finish parsing
a single object, a `json-size-limit-exceeded-error` will be signaled.
If both the size and depth limits are exceeded by exactly the same character,
it is unspecified which of the two errors will be signaled.")
(defparameter *read-depth-limit* 100
"The maximum depth of nested objects and vectors to allow in a single `jarl:read` call.
If parsing a single object would require descending into more than this number
of JSON objects and/or arrays, a `json-depth-limit-exceeded-error` error will
be signaled.
If both the size and depth limits are exceeded by exactly the same character,
it is unspecified which of the two errors will be signaled.")
(defstruct input
(stream nil :type stream)
(line 1 :type (and fixnum (integer 0)))
(column 0 :type (and fixnum (integer 0)))
(depth 0 :type (and fixnum (integer 0)))
(depth-limit *read-depth-limit* :type (and fixnum (integer 0)))
(size 0 :type (and fixnum (integer 0)))
(size-limit *read-size-limit* :type (and fixnum (integer 0)))
(string-buffer nil :type (or null stream)))
(defun p (input &optional (eof :eof)) ; peek
(declare (type input input)
(optimize (speed 3) (safety 1) (debug 1)))
(peek-char nil (input-stream input) nil eof))
(defun r (input) ; read
(declare (type input input)
(optimize (speed 3) (safety 1) (debug 1)))
(let ((character (read-char (input-stream input) nil :eof)))
(case character
(#\newline (progn
(incf (input-line input))
(setf (input-column input) 0)))
(#\tab (incf (input-column input) 8))
(t (incf (input-column input))))
(when (> (incf (input-size input))
(input-size-limit input))
(error 'json-size-limit-exceeded-error
:line (input-line input)
:column (input-column input)
:limit (input-size-limit input)))
character))
;;;; Errors -------------------------------------------------------------------
(define-condition json-error (error)
((line :accessor line :initarg :line)
(column :accessor column :initarg :column)))
(define-condition json-parsing-error (json-error)
((class-designator :accessor class-designator :initarg :class-designator)
(message :accessor message :initarg :message))
(:report
(lambda (c stream)
(format stream "Error parsing JSON~@[ into ~S~] at line ~D column ~D: ~A"
(class-designator c)
(line c)
(column c)
(message c)))))
(define-condition json-limit-exceeded-error (json-parsing-error)
((limit :accessor limit :initarg :limit)
(limit-name :allocation :class))
(:report
(lambda (c stream)
(format stream "~:(~A~) limit (~D) exceeded while parsing JSON at line ~D column ~D."
(slot-value c 'limit-name)
(limit c)
(line c)
(column c)))))
(define-condition json-size-limit-exceeded-error (json-limit-exceeded-error)
((limit-name :initform "size")))
(define-condition json-depth-limit-exceeded-error (json-limit-exceeded-error)
((limit-name :initform "depth")))
(defun e (class input format-string &rest args) ; error
(error 'json-parsing-error
:class-designator class
:line (input-line input)
:column (input-column input)
:message (apply #'format nil format-string args)))
(defun incf-depth (input)
(declare (type input input)
(optimize (speed 3) (safety 1) (debug 1)))
(when (> (incf (input-depth input))
(input-depth-limit input))
(error 'json-depth-limit-exceeded-error
:line (input-line input)
:column (input-column input)
:limit (input-depth-limit input))))
(defun decf-depth (input)
(declare (type input input)
(optimize (speed 3) (safety 1) (debug 1)))
(decf (input-depth input)))
;;;; Parsing Utilities --------------------------------------------------------
(defun skip-whitespace (input)
(loop :for ch = (p input)
:while (member ch '(#\space #\tab #\newline #\linefeed #\return))
:do (r input)))
(defun requires-escape-p (character)
(or (char= #\" character)
(char= #\\ character)
(<= (char-code character) #x1F)))
(defun parse-literal (input literal remaining-characters)
(loop :for next :across remaining-characters
:for char = (r input)
:unless (eql next char)
:do (e 'keyword input "expected ~S when parsing ~S but got ~S" next literal char))
literal)
(defun parse-hex-digit (input)
(let ((ch (r input)))
(if (eql :eof ch)
(e nil input "cannot parse \\u escape sequence, got ~S" :eof)
(or (digit-char-p ch 16)
(e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch)))))
(defun parse-escaped-character (input)
(let ((ch (r input)))
(case ch
(#\" #\")
(#\\ #\\)
(#\/ #\/)
(#\b #\backspace)
(#\f (code-char #x0C))
(#\n #\linefeed)
(#\r #\return)
(#\t #\tab)
(#\u (let ((a (parse-hex-digit input)) ; todo handle surrogate pairs
(b (parse-hex-digit input))
(c (parse-hex-digit input))
(d (parse-hex-digit input)))
(code-char (+ (* a (expt 16 3)) ; todo maybe do this more portably
(* b (expt 16 2))
(* c (expt 16 1))
(* d (expt 16 0))))))
(t (e nil input "bad escape sequence ~S ~S" #\\ ch)))))
(defun parse-int (input &optional (allow-leading-zero t))
(loop :with i = 0
:with n = 0
:for ch = (p input #\e)
:for digit = (digit-char-p ch)
:while digit
:do (progn (r input)
(when (and (not allow-leading-zero)
(zerop n) ; leading
(zerop digit) ; zero
(digit-char-p (p input #\e))) ; but not a bare zero
(e nil input "bad leading zero"))
(incf n)
(setf i (+ (* 10 i) digit)))
:finally (if (zerop n)
(e nil input "expected an integer")
(return (values i n)))))
(defun parse-exponent (input)
(let* ((char (p input))
(sign (if (member char '(#\+ #\-))
(progn (r input)
(case char
(#\+ 1)
(#\- -1)))
1)))
(* sign (parse-int input))))
(defun parse-number (input)
(let ((sign 1) integer
(fractional 0) (fractional-length 0)
(exponent 0) has-exponent)
(when (eql #\- (p input))
(r input)
(setf sign -1))
(setf integer (parse-int input nil))
(when (eql #\. (p input))
(r input)
(setf (values fractional fractional-length) (parse-int input)))
(when (member (p input) '(#\e #\E))
(r input)
(setf exponent (parse-exponent input)
has-exponent t))
(if (and (zerop fractional-length) (not has-exponent))
(* sign integer)
(values
(coerce ;; todo make this less horrifying
(read-from-string (format nil "~A~D.~V,'0Dd~D"
(if (= -1 sign) #\- #\+)
integer
fractional-length
fractional
exponent))
'double-float)))))
(defun parse-kv-separator (class input)
(skip-whitespace input)
(let ((ch (r input)))
(unless (eql #\: ch)
(e class input "expected separator ~S but got ~S" #\: ch))
(skip-whitespace input)))
;;;; Readers ------------------------------------------------------------------
(defgeneric read% (class contained-class input))
(defmethod read% ((class (eql 'keyword)) contained-class input)
(let ((ch (r input)))
(case ch
(#\t (parse-literal input :true "rue"))
(#\f (parse-literal input :false "alse"))
(t (e 'keyword input "expected ~S or ~S but got ~S" #\t #\f ch)))))
(defmethod read% ((class (eql 'null)) contained-class input)
(let ((ch (r input)))
(if (eql ch #\n)
(parse-literal input nil "ull")
(e 'null input "expected ~S but got ~S" #\n ch))))
(defmethod read% ((class (eql 'vector)) contained-class input)
(let ((ch (r input)))
(unless (eql ch #\[)
(e 'vector input "expected ~S but got ~S" #\[ ch)))
(incf-depth input)
(skip-whitespace input)
;; todo allow specialized vectors?
(if (eql (p input) #\])
(progn (r input)
(decf-depth input)
(vector))
(coerce
(loop
:with c = (car contained-class)
:with cc = (cadr contained-class)
:collect (read% c cc input)
:do (progn
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\] (decf-depth input) (loop-finish))
(#\, (skip-whitespace input))
(t (e 'vector input "expected ~S or ~S but got ~S." #\] #\, ch))))))
'vector)))
(defmethod read% ((class (eql 'string)) contained-class input)
(let ((ch (r input)))
(unless (eql ch #\")
(e 'string input "expected opening ~S but got ~S" #\" ch)))
(loop :with s = (or (input-string-buffer input)
(setf (input-string-buffer input)
(make-string-output-stream)))
:for ch = (r input)
:do (case ch
(:eof (e 'string input "got ~S" :eof))
(#\\ (write-char (parse-escaped-character input) s))
(#\" (return (get-output-stream-string s)))
(t (if (requires-escape-p ch)
(e 'string input "bad unescaped character ~S" ch)
(write-char ch s))))))
(defmethod read% ((class (eql 'hash-table)) contained-class input)
(let ((ch (r input)))
(unless (eql ch #\{)
(e 'hash-table input "expected ~S but got ~S" #\{ ch)))
(incf-depth input)
(skip-whitespace input)
(let ((result (make-hash-table :test #'equal)))
(if (eql (p input) #\})
(progn (r input)
(decf-depth input))
(loop
:with c = (car contained-class)
:with cc = (cadr contained-class)
:for name = (read% 'string nil input)
:for sep = (parse-kv-separator 'hash-table input)
:for value = (progn (skip-whitespace input)
(read% c cc input))
:do (progn
(setf (gethash name result) value)
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\} (decf-depth input) (loop-finish))
(#\, (skip-whitespace input))
(t (e 'hash-table input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
result))
(defmethod read% ((class (eql 'number)) contained-class input)
(let ((ch (p input)))
(unless (member ch '(#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(r input) ; chomp to ensure accurate column count
(e 'number input "expected digit or ~S but got ~S" #\- ch)))
(parse-number input))
(defmethod read% ((class (eql 'nullable)) contained-class input)
(case (p input)
(:eof (e `(or null ,input) input "got ~S" :eof))
(#\n (read% 'null nil input))
(t (read% (first contained-class) (second contained-class) input))))
(defmethod read% ((class (eql t)) contained-class input)
(skip-whitespace input)
(case (p input)
(:eof (r input) (e 't input "got ~S" :eof))
((#\t #\f) (read% 'keyword nil input))
(#\n (read% 'null nil input))
(#\" (read% 'string nil input))
(#\{ (read% 'hash-table '(t) input))
(#\[ (read% 'vector '(t) input))
((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read% 'number nil input))
(t (e 't input "unexpected character ~S" (r input)))))
;;;; Printing -----------------------------------------------------------------
(defun write-escaped-char (char stream)
(case char
((#\newline #\linefeed) (write-string "\\n" stream))
(#\return (write-string "\\r" stream))
(#\\ (write-string "\\\\" stream))
(t (format stream "\\u~4,'0X" (char-code char)))))
(defgeneric print% (thing stream))
(defmethod print% ((thing null) stream)
(write-string "null" stream))
(defmethod print% ((thing string) stream)
(write-char #\" stream)
(loop :for char :across thing
:do (if (requires-escape-p char)
(write-escaped-char char stream)
(write-char char stream)))
(write-char #\" stream))
(defmethod print% ((thing vector) stream)
(write-char #\[ stream)
(loop :with first = t
:for object :across thing
:do (progn (if first
(setf first nil)
(write-char #\, stream))
(print% object stream)))
(write-char #\] stream))
(defmethod print% ((thing hash-table) stream)
(write-char #\{ stream)
(loop :with first = t
:for name :being :the hash-keys :in thing :using (hash-value value)
:do (progn (if first
(setf first nil)
(write-char #\, stream))
(assert (stringp name))
(print% name stream)
(write-char #\: stream)
(print% value stream)))
(write-char #\} stream))
(defmethod print% ((thing single-float) stream)
(let ((*read-default-float-format* 'single-float))
(princ thing stream)))
(defmethod print% ((thing double-float) stream)
(let ((*read-default-float-format* 'double-float))
(princ thing stream)))
(defmethod print% ((thing integer) stream)
(format stream "~D" thing))
(defmethod print% ((thing (eql :false)) stream)
(write-string "false" stream))
(defmethod print% ((thing (eql :true)) stream)
(write-string "true" stream))
;;;; API ----------------------------------------------------------------------
(defun canonicalize-class-designator (class-designator)
(flet ((fail () (error "Malformed class designator ~S" class-designator)))
(etypecase class-designator
(cons (destructuring-bind (head a &optional (b nil b?)) class-designator
(ecase head
; e.g. (vector foo)
((hash-table vector)
(progn (when b? (fail))
(list head (canonicalize-class-designator a))))
; e.g. (or null foo)
(or (progn (unless b? (fail)) ; must have a second option
(when (eql 'null b) (rotatef a b)) ; sort a/b
(unless (eql 'null a) (fail)) ; no arbitrary ors
(list 'nullable (canonicalize-class-designator b)))))))
(symbol (case class-designator
(vector '(vector (t)))
(hash-table '(hash-table (t)))
(t (list class-designator)))))))
(defun ensure-stream (stream-or-string)
(etypecase stream-or-string
(stream stream-or-string)
(string (make-string-input-stream stream-or-string))))
(defun read (class-designator stream-or-string &optional (eof-error-p t) eof)
(let ((input (make-input :stream (ensure-stream stream-or-string))))
(skip-whitespace input)
(if (eql :eof (p input))
(if eof-error-p
(error 'end-of-file)
eof)
(destructuring-bind (class &optional contained)
(canonicalize-class-designator class-designator)
(read% class contained input)))))
(defun print (object &optional (stream *standard-output*))
(let ((*read-default-float-format* 'double-float)
(*print-base* 10))
(etypecase stream
(stream (print% object stream) (values))
((eql t) (print% object *standard-output*) (values))
(null (with-output-to-string (s) (print% object s))))))