7419c99f464c

Add opaque-json parsing

This doesn't actually seem to save that much as implemented — turns out
`write-char` is not cheap.  I wonder if it's even worth implementing this, or if
just using the vanilla parsing is enough.  I may remove all this and just use
`t` to preserve extra fields in the future.
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Sat, 15 Aug 2020 00:27:02 -0400
parents f9f5fbd2e8bc
children 24d3163b1f64
branches/tags (none)
files jarl.asd src/discard.lisp src/opaque.lisp test/tests.lisp

Changes

--- a/jarl.asd	Fri Aug 14 23:08:54 2020 -0400
+++ b/jarl.asd	Sat Aug 15 00:27:02 2020 -0400
@@ -14,7 +14,7 @@
   :components ((:module "src" :serial t
                 :components ((:file "package")
                              (:file "basic")
-                             (:file "discard")
+                             (:file "opaque")
                              (:file "mop")))))
 
 
--- a/src/discard.lisp	Fri Aug 14 23:08:54 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-(in-package :jarl)
-
-;; Optimized reader for cases where you don't actually care about the value and
-;; just need to parse over it without allocating anything.
-
-(defun discard-literal (string input)
-  (loop :for next :across string
-        :for char = (r input)
-        :unless (eql next char)
-        :do (e nil input "expected ~S when parsing ~S but got ~S" next string char)))
-
-(defun discard-array (input)
-  (r input) ; [
-  (incf-depth input)
-  (skip-whitespace input)
-  (if (eql (p input) #\])
-    (progn (decf-depth input)
-           (r input))
-    (loop (discard-any input)
-          (skip-whitespace input)
-          (let ((ch (r input)))
-            (case ch
-              (#\] (decf-depth input) (return))
-              (#\, (skip-whitespace input))
-              (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch)))))))
-
-(defun discard-object (input)
-  (r input) ; {
-  (incf-depth input)
-  (skip-whitespace input)
-  (if (eql (p input) #\})
-    (progn (decf-depth input)
-           (r input))
-    (loop (discard-string input)
-          (parse-kv-separator nil input)
-          (discard-any input)
-          (skip-whitespace input)
-          (let ((ch (r input)))
-            (case ch
-              (#\} (decf-depth input) (return))
-              (#\, (skip-whitespace input))
-              (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
-
-(defun discard-string (input)
-  (let ((ch (r input)))
-    (unless (eql ch #\")
-      (e nil input "expected opening ~S but got ~S" #\" ch)))
-  (loop :for ch = (r input)
-        :do (cond
-              ((eql ch :eof) (e nil input "got ~S" :eof))
-              ((eql ch #\\) (parse-escaped-character input)) ; TODO: Optimize this too.
-              ((eql ch #\") (return))
-              ((requires-escape-p ch) (e nil input "bad unescaped character ~S" ch))
-              (t nil))))
-
-(defun discard-number (input)
-  ;; TODO: Optimize this too.  Not a huge priority since fixnums don't cons.
-  (parse-number input))
-
-(defun discard-any (input)
-  (case (p input)
-    (:eof (r input) (e nil input "got ~S" :eof))
-    (#\n (discard-literal "null" input))
-    (#\t (discard-literal "true" input))
-    (#\f (discard-literal "false" input))
-    (#\" (discard-string input))
-    (#\{ (discard-object input))
-    (#\[ (discard-array input))
-    ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (discard-number input))
-    (t (e nil input "unexpected character ~S" (r input)))))
-
-
-(defmethod read% ((class (eql 'nil)) contained-class input)
-  (skip-whitespace input)
-  (discard-any input)
-  (values))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/opaque.lisp	Sat Aug 15 00:27:02 2020 -0400
@@ -0,0 +1,157 @@
+(in-package :jarl)
+
+;; Optimized readers for cases where you just want to make sure the JSON parses
+;; and preserve it for later, but don't want to allocate all the internal
+;; objects.
+;;
+;; Parsing into opaque-json stores the JSON in a string so it can be emitted
+;; later, but doesn't bother allocating all the internal objects  It's similar
+;; to Golang's json.RawMessage.
+;;
+;; Parsing into nil parses the JSON to make sure it's well formed, but discards
+;; the characters entirely.
+;;
+;; TODO: Opaque JSON doesn't seem to save all that much over vanilla parsing.
+;; Investigate why, and whether we should bother with this at all.
+
+
+(defclass opaque-json ()
+  ((data :accessor data :initarg :data)))
+
+(defmethod print-object ((o opaque-json) s)
+  (print-unreadable-object (o s :type t)
+    (format s "~S" (if (> (length (data o)) 10)
+                     (concatenate 'string (subseq (data o) 0 10) "…")
+                     (data o)))))
+
+(defun w (input output) ; write through (and read)
+  (declare (type input input) (optimize (speed 3) (safety 1) (debug 1)))
+  (let ((ch (r input)))
+    (unless (or (null output) (eql :eof ch))
+      (write-char ch output))
+    ch))
+
+
+(defun opaque-hex-digit (input output)
+  (let ((ch (w input output)))
+    (if (eql :eof ch)
+      (e nil input "cannot parse \\u escape sequence, got ~S" :eof)
+      (or (digit-char-p ch 16)
+          (e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch)))))
+
+(defun opaque-escaped-character (input output)
+  (let ((ch (w input output)))
+    (case ch
+      ((#\" #\\ #\/ #\b #\f #\n #\r #\t) nil)
+      (#\u (loop :repeat 4 :do (opaque-hex-digit input output)))
+      (t (e nil input "bad escape sequence ~S ~S" #\\ ch))))
+  nil)
+
+(defun opaque-literal (string input output)
+  (loop :for next :across string
+        :for char = (w input output)
+        :unless (eql next char)
+        :do (e nil input "expected ~S when parsing ~S but got ~S" next string char)))
+
+(defun opaque-array (input output)
+  (w input output) ; [
+  (incf-depth input)
+  (skip-whitespace input)
+  (if (eql (p input) #\])
+    (progn (decf-depth input)
+           (w input output))
+    (loop (opaque-any input output)
+          (skip-whitespace input)
+          (let ((ch (w input output)))
+            (case ch
+              (#\] (decf-depth input) (return))
+              (#\, (skip-whitespace input))
+              (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch)))))))
+
+(defun opaque-object (input output)
+  (w input output) ; {
+  (incf-depth input)
+  (skip-whitespace input)
+  (if (eql (p input) #\})
+    (progn (decf-depth input)
+           (w input output))
+    (loop (opaque-string input output)
+          (parse-kv-separator nil input)
+          (when output (write-char #\: output))
+          (opaque-any input output)
+          (skip-whitespace input)
+          (let ((ch (w input output)))
+            (case ch
+              (#\} (decf-depth input) (return))
+              (#\, (skip-whitespace input))
+              (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
+
+(defun opaque-string (input output)
+  (let ((ch (w input output)))
+    (unless (eql ch #\")
+      (e nil input "expected opening ~S but got ~S" #\" ch)))
+  (loop :for ch = (w input output)
+        :do (case ch
+              (:eof (e nil input "got ~S" :eof))
+              (#\\ (opaque-escaped-character input output))
+              (#\" (return))
+              (t (if (requires-escape-p ch)
+                   (e nil input "bad unescaped character ~S" ch)
+                   nil)))))
+
+(defun opaque-int (input output &optional (allow-leading-zero t))
+  (loop :for n :from 0
+        :for ch = (p input #\e)
+        :for digit = (digit-char-p ch)
+        :while digit
+        :do (progn (w input output)
+                   (when (and (not allow-leading-zero)
+                              (zerop n) ; leading
+                              (zerop digit) ; zero
+                              (digit-char-p (p input #\e))) ; but not a bare zero
+                     (e nil input "bad leading zero")))
+        :finally (when (zerop n)
+                   (e nil input "expected an integer"))))
+
+(defun opaque-exponent (input output)
+  (when (member (p input) '(#\+ #\-))
+    (w input output))
+  (opaque-int input output))
+
+(defun opaque-number (input output)
+  (when (eql #\- (p input))
+    (w input output))
+  (opaque-int input output nil)
+  (when (eql #\. (p input))
+    (w input output)
+    (opaque-int input output))
+  (when (member (p input) '(#\e #\E))
+    (w input output)
+    (opaque-exponent input output)))
+
+(defun opaque-any (input output)
+  (case (p input)
+    (:eof (w input output) (e 'opaque-json input "got ~S" :eof))
+    (#\n (opaque-literal "null" input output))
+    (#\t (opaque-literal "true" input output))
+    (#\f (opaque-literal "false" input output))
+    (#\" (opaque-string input output))
+    (#\{ (opaque-object input output))
+    (#\[ (opaque-array input output))
+    ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (opaque-number input output))
+    (t (e nil input "unexpected character ~S" (w input output)))))
+
+
+(defmethod read% ((class (eql 'opaque-json)) contained-class input)
+  (skip-whitespace input)
+  (make-instance 'opaque-json
+    :data (with-output-to-string (s)
+            (opaque-any input s))))
+
+(defmethod print% ((thing opaque-json) stream)
+  (write-string (data thing) stream))
+
+(defmethod read% ((class (eql 'nil)) contained-class input)
+  (skip-whitespace input)
+  (opaque-any input nil)
+  (values))
--- a/test/tests.lisp	Fri Aug 14 23:08:54 2020 -0400
+++ b/test/tests.lisp	Sat Aug 15 00:27:02 2020 -0400
@@ -58,6 +58,15 @@
     (is (null (multiple-value-list (jarl:read nil s))))
     (signals end-of-file (jarl:read t s))))
 
+(defun check-opaques-one-object (object string)
+  (with-input-from-string (s string)
+    ;; Make sure we can read it as opaque JSON.
+    (let ((oj (jarl:read 'jarl::opaque-json s)))
+      ;; Roundtrip it through print and read again and make sure it doesn't get
+      ;; mangled.
+      (is (same object (jarl:read t (jarl:print oj nil)))))
+    (signals end-of-file (jarl:read t s))))
+
 (defun check-roundtrips (object)
   (is (same object (jarl:read t (jarl:print object nil)))))
 
@@ -82,7 +91,9 @@
                   ; Check that the entire string deserializes to the expected form.
                   (check-reads-one-object ,object ,string)
                   ; Check that we can roundtrip the form reliably.
-                  (is (same ,object (jarl:read t (jarl:print ,object nil))))
+                  (check-roundtrips ,object)
+                  ; Check that we can parse it opaquely too.
+                  (check-opaques-one-object ,object ,string)
                   ; Check that we can discard it safely.
                   (check-discards-one-object ,string))))))