# HG changeset patch # User Steve Losh # Date 1597465622 14400 # Node ID 7419c99f464caf5277e934fb0299b692bfe41702 # Parent f9f5fbd2e8bc6b564a16f23617546b0ccb6d373b Add opaque-json parsing This doesn't actually seem to save that much as implemented — turns out `write-char` is not cheap. I wonder if it's even worth implementing this, or if just using the vanilla parsing is enough. I may remove all this and just use `t` to preserve extra fields in the future. diff -r f9f5fbd2e8bc -r 7419c99f464c jarl.asd --- a/jarl.asd Fri Aug 14 23:08:54 2020 -0400 +++ b/jarl.asd Sat Aug 15 00:27:02 2020 -0400 @@ -14,7 +14,7 @@ :components ((:module "src" :serial t :components ((:file "package") (:file "basic") - (:file "discard") + (:file "opaque") (:file "mop"))))) diff -r f9f5fbd2e8bc -r 7419c99f464c src/discard.lisp --- a/src/discard.lisp Fri Aug 14 23:08:54 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -(in-package :jarl) - -;; Optimized reader for cases where you don't actually care about the value and -;; just need to parse over it without allocating anything. - -(defun discard-literal (string input) - (loop :for next :across string - :for char = (r input) - :unless (eql next char) - :do (e nil input "expected ~S when parsing ~S but got ~S" next string char))) - -(defun discard-array (input) - (r input) ; [ - (incf-depth input) - (skip-whitespace input) - (if (eql (p input) #\]) - (progn (decf-depth input) - (r input)) - (loop (discard-any input) - (skip-whitespace input) - (let ((ch (r input))) - (case ch - (#\] (decf-depth input) (return)) - (#\, (skip-whitespace input)) - (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch))))))) - -(defun discard-object (input) - (r input) ; { - (incf-depth input) - (skip-whitespace input) - (if (eql (p input) #\}) - (progn (decf-depth input) - (r input)) - (loop (discard-string input) - (parse-kv-separator nil input) - (discard-any input) - (skip-whitespace input) - (let ((ch (r input))) - (case ch - (#\} (decf-depth input) (return)) - (#\, (skip-whitespace input)) - (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch))))))) - -(defun discard-string (input) - (let ((ch (r input))) - (unless (eql ch #\") - (e nil input "expected opening ~S but got ~S" #\" ch))) - (loop :for ch = (r input) - :do (cond - ((eql ch :eof) (e nil input "got ~S" :eof)) - ((eql ch #\\) (parse-escaped-character input)) ; TODO: Optimize this too. - ((eql ch #\") (return)) - ((requires-escape-p ch) (e nil input "bad unescaped character ~S" ch)) - (t nil)))) - -(defun discard-number (input) - ;; TODO: Optimize this too. Not a huge priority since fixnums don't cons. - (parse-number input)) - -(defun discard-any (input) - (case (p input) - (:eof (r input) (e nil input "got ~S" :eof)) - (#\n (discard-literal "null" input)) - (#\t (discard-literal "true" input)) - (#\f (discard-literal "false" input)) - (#\" (discard-string input)) - (#\{ (discard-object input)) - (#\[ (discard-array input)) - ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (discard-number input)) - (t (e nil input "unexpected character ~S" (r input))))) - - -(defmethod read% ((class (eql 'nil)) contained-class input) - (skip-whitespace input) - (discard-any input) - (values)) diff -r f9f5fbd2e8bc -r 7419c99f464c src/opaque.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/opaque.lisp Sat Aug 15 00:27:02 2020 -0400 @@ -0,0 +1,157 @@ +(in-package :jarl) + +;; Optimized readers for cases where you just want to make sure the JSON parses +;; and preserve it for later, but don't want to allocate all the internal +;; objects. +;; +;; Parsing into opaque-json stores the JSON in a string so it can be emitted +;; later, but doesn't bother allocating all the internal objects It's similar +;; to Golang's json.RawMessage. +;; +;; Parsing into nil parses the JSON to make sure it's well formed, but discards +;; the characters entirely. +;; +;; TODO: Opaque JSON doesn't seem to save all that much over vanilla parsing. +;; Investigate why, and whether we should bother with this at all. + + +(defclass opaque-json () + ((data :accessor data :initarg :data))) + +(defmethod print-object ((o opaque-json) s) + (print-unreadable-object (o s :type t) + (format s "~S" (if (> (length (data o)) 10) + (concatenate 'string (subseq (data o) 0 10) "…") + (data o))))) + +(defun w (input output) ; write through (and read) + (declare (type input input) (optimize (speed 3) (safety 1) (debug 1))) + (let ((ch (r input))) + (unless (or (null output) (eql :eof ch)) + (write-char ch output)) + ch)) + + +(defun opaque-hex-digit (input output) + (let ((ch (w input output))) + (if (eql :eof ch) + (e nil input "cannot parse \\u escape sequence, got ~S" :eof) + (or (digit-char-p ch 16) + (e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch))))) + +(defun opaque-escaped-character (input output) + (let ((ch (w input output))) + (case ch + ((#\" #\\ #\/ #\b #\f #\n #\r #\t) nil) + (#\u (loop :repeat 4 :do (opaque-hex-digit input output))) + (t (e nil input "bad escape sequence ~S ~S" #\\ ch)))) + nil) + +(defun opaque-literal (string input output) + (loop :for next :across string + :for char = (w input output) + :unless (eql next char) + :do (e nil input "expected ~S when parsing ~S but got ~S" next string char))) + +(defun opaque-array (input output) + (w input output) ; [ + (incf-depth input) + (skip-whitespace input) + (if (eql (p input) #\]) + (progn (decf-depth input) + (w input output)) + (loop (opaque-any input output) + (skip-whitespace input) + (let ((ch (w input output))) + (case ch + (#\] (decf-depth input) (return)) + (#\, (skip-whitespace input)) + (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch))))))) + +(defun opaque-object (input output) + (w input output) ; { + (incf-depth input) + (skip-whitespace input) + (if (eql (p input) #\}) + (progn (decf-depth input) + (w input output)) + (loop (opaque-string input output) + (parse-kv-separator nil input) + (when output (write-char #\: output)) + (opaque-any input output) + (skip-whitespace input) + (let ((ch (w input output))) + (case ch + (#\} (decf-depth input) (return)) + (#\, (skip-whitespace input)) + (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch))))))) + +(defun opaque-string (input output) + (let ((ch (w input output))) + (unless (eql ch #\") + (e nil input "expected opening ~S but got ~S" #\" ch))) + (loop :for ch = (w input output) + :do (case ch + (:eof (e nil input "got ~S" :eof)) + (#\\ (opaque-escaped-character input output)) + (#\" (return)) + (t (if (requires-escape-p ch) + (e nil input "bad unescaped character ~S" ch) + nil))))) + +(defun opaque-int (input output &optional (allow-leading-zero t)) + (loop :for n :from 0 + :for ch = (p input #\e) + :for digit = (digit-char-p ch) + :while digit + :do (progn (w input output) + (when (and (not allow-leading-zero) + (zerop n) ; leading + (zerop digit) ; zero + (digit-char-p (p input #\e))) ; but not a bare zero + (e nil input "bad leading zero"))) + :finally (when (zerop n) + (e nil input "expected an integer")))) + +(defun opaque-exponent (input output) + (when (member (p input) '(#\+ #\-)) + (w input output)) + (opaque-int input output)) + +(defun opaque-number (input output) + (when (eql #\- (p input)) + (w input output)) + (opaque-int input output nil) + (when (eql #\. (p input)) + (w input output) + (opaque-int input output)) + (when (member (p input) '(#\e #\E)) + (w input output) + (opaque-exponent input output))) + +(defun opaque-any (input output) + (case (p input) + (:eof (w input output) (e 'opaque-json input "got ~S" :eof)) + (#\n (opaque-literal "null" input output)) + (#\t (opaque-literal "true" input output)) + (#\f (opaque-literal "false" input output)) + (#\" (opaque-string input output)) + (#\{ (opaque-object input output)) + (#\[ (opaque-array input output)) + ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (opaque-number input output)) + (t (e nil input "unexpected character ~S" (w input output))))) + + +(defmethod read% ((class (eql 'opaque-json)) contained-class input) + (skip-whitespace input) + (make-instance 'opaque-json + :data (with-output-to-string (s) + (opaque-any input s)))) + +(defmethod print% ((thing opaque-json) stream) + (write-string (data thing) stream)) + +(defmethod read% ((class (eql 'nil)) contained-class input) + (skip-whitespace input) + (opaque-any input nil) + (values)) diff -r f9f5fbd2e8bc -r 7419c99f464c test/tests.lisp --- a/test/tests.lisp Fri Aug 14 23:08:54 2020 -0400 +++ b/test/tests.lisp Sat Aug 15 00:27:02 2020 -0400 @@ -58,6 +58,15 @@ (is (null (multiple-value-list (jarl:read nil s)))) (signals end-of-file (jarl:read t s)))) +(defun check-opaques-one-object (object string) + (with-input-from-string (s string) + ;; Make sure we can read it as opaque JSON. + (let ((oj (jarl:read 'jarl::opaque-json s))) + ;; Roundtrip it through print and read again and make sure it doesn't get + ;; mangled. + (is (same object (jarl:read t (jarl:print oj nil))))) + (signals end-of-file (jarl:read t s)))) + (defun check-roundtrips (object) (is (same object (jarl:read t (jarl:print object nil))))) @@ -82,7 +91,9 @@ ; Check that the entire string deserializes to the expected form. (check-reads-one-object ,object ,string) ; Check that we can roundtrip the form reliably. - (is (same ,object (jarl:read t (jarl:print ,object nil)))) + (check-roundtrips ,object) + ; Check that we can parse it opaquely too. + (check-opaques-one-object ,object ,string) ; Check that we can discard it safely. (check-discards-one-object ,string))))))