src/opaque.lisp @ 7419c99f464c

Add opaque-json parsing

This doesn't actually seem to save that much as implemented — turns out
`write-char` is not cheap.  I wonder if it's even worth implementing this, or if
just using the vanilla parsing is enough.  I may remove all this and just use
`t` to preserve extra fields in the future.
author Steve Losh <steve@stevelosh.com>
date Sat, 15 Aug 2020 00:27:02 -0400
parents (none)
children (none)
(in-package :jarl)

;; Optimized readers for cases where you just want to make sure the JSON parses
;; and preserve it for later, but don't want to allocate all the internal
;; objects.
;;
;; Parsing into opaque-json stores the JSON in a string so it can be emitted
;; later, but doesn't bother allocating all the internal objects  It's similar
;; to Golang's json.RawMessage.
;;
;; Parsing into nil parses the JSON to make sure it's well formed, but discards
;; the characters entirely.
;;
;; TODO: Opaque JSON doesn't seem to save all that much over vanilla parsing.
;; Investigate why, and whether we should bother with this at all.


(defclass opaque-json ()
  ((data :accessor data :initarg :data)))

(defmethod print-object ((o opaque-json) s)
  (print-unreadable-object (o s :type t)
    (format s "~S" (if (> (length (data o)) 10)
                     (concatenate 'string (subseq (data o) 0 10) "…")
                     (data o)))))

(defun w (input output) ; write through (and read)
  (declare (type input input) (optimize (speed 3) (safety 1) (debug 1)))
  (let ((ch (r input)))
    (unless (or (null output) (eql :eof ch))
      (write-char ch output))
    ch))


(defun opaque-hex-digit (input output)
  (let ((ch (w input output)))
    (if (eql :eof ch)
      (e nil input "cannot parse \\u escape sequence, got ~S" :eof)
      (or (digit-char-p ch 16)
          (e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch)))))

(defun opaque-escaped-character (input output)
  (let ((ch (w input output)))
    (case ch
      ((#\" #\\ #\/ #\b #\f #\n #\r #\t) nil)
      (#\u (loop :repeat 4 :do (opaque-hex-digit input output)))
      (t (e nil input "bad escape sequence ~S ~S" #\\ ch))))
  nil)

(defun opaque-literal (string input output)
  (loop :for next :across string
        :for char = (w input output)
        :unless (eql next char)
        :do (e nil input "expected ~S when parsing ~S but got ~S" next string char)))

(defun opaque-array (input output)
  (w input output) ; [
  (incf-depth input)
  (skip-whitespace input)
  (if (eql (p input) #\])
    (progn (decf-depth input)
           (w input output))
    (loop (opaque-any input output)
          (skip-whitespace input)
          (let ((ch (w input output)))
            (case ch
              (#\] (decf-depth input) (return))
              (#\, (skip-whitespace input))
              (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch)))))))

(defun opaque-object (input output)
  (w input output) ; {
  (incf-depth input)
  (skip-whitespace input)
  (if (eql (p input) #\})
    (progn (decf-depth input)
           (w input output))
    (loop (opaque-string input output)
          (parse-kv-separator nil input)
          (when output (write-char #\: output))
          (opaque-any input output)
          (skip-whitespace input)
          (let ((ch (w input output)))
            (case ch
              (#\} (decf-depth input) (return))
              (#\, (skip-whitespace input))
              (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch)))))))

(defun opaque-string (input output)
  (let ((ch (w input output)))
    (unless (eql ch #\")
      (e nil input "expected opening ~S but got ~S" #\" ch)))
  (loop :for ch = (w input output)
        :do (case ch
              (:eof (e nil input "got ~S" :eof))
              (#\\ (opaque-escaped-character input output))
              (#\" (return))
              (t (if (requires-escape-p ch)
                   (e nil input "bad unescaped character ~S" ch)
                   nil)))))

(defun opaque-int (input output &optional (allow-leading-zero t))
  (loop :for n :from 0
        :for ch = (p input #\e)
        :for digit = (digit-char-p ch)
        :while digit
        :do (progn (w input output)
                   (when (and (not allow-leading-zero)
                              (zerop n) ; leading
                              (zerop digit) ; zero
                              (digit-char-p (p input #\e))) ; but not a bare zero
                     (e nil input "bad leading zero")))
        :finally (when (zerop n)
                   (e nil input "expected an integer"))))

(defun opaque-exponent (input output)
  (when (member (p input) '(#\+ #\-))
    (w input output))
  (opaque-int input output))

(defun opaque-number (input output)
  (when (eql #\- (p input))
    (w input output))
  (opaque-int input output nil)
  (when (eql #\. (p input))
    (w input output)
    (opaque-int input output))
  (when (member (p input) '(#\e #\E))
    (w input output)
    (opaque-exponent input output)))

(defun opaque-any (input output)
  (case (p input)
    (:eof (w input output) (e 'opaque-json input "got ~S" :eof))
    (#\n (opaque-literal "null" input output))
    (#\t (opaque-literal "true" input output))
    (#\f (opaque-literal "false" input output))
    (#\" (opaque-string input output))
    (#\{ (opaque-object input output))
    (#\[ (opaque-array input output))
    ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (opaque-number input output))
    (t (e nil input "unexpected character ~S" (w input output)))))


(defmethod read% ((class (eql 'opaque-json)) contained-class input)
  (skip-whitespace input)
  (make-instance 'opaque-json
    :data (with-output-to-string (s)
            (opaque-any input s))))

(defmethod print% ((thing opaque-json) stream)
  (write-string (data thing) stream))

(defmethod read% ((class (eql 'nil)) contained-class input)
  (skip-whitespace input)
  (opaque-any input nil)
  (values))