Add opaque-json parsing
This doesn't actually seem to save that much as implemented — turns out
`write-char` is not cheap. I wonder if it's even worth implementing this, or if
just using the vanilla parsing is enough. I may remove all this and just use
`t` to preserve extra fields in the future.
author |
Steve Losh <steve@stevelosh.com> |
date |
Sat, 15 Aug 2020 00:27:02 -0400 |
parents |
f9f5fbd2e8bc
|
children |
24d3163b1f64
|
branches/tags |
(none) |
files |
jarl.asd src/discard.lisp src/opaque.lisp test/tests.lisp |
Changes
--- a/jarl.asd Fri Aug 14 23:08:54 2020 -0400
+++ b/jarl.asd Sat Aug 15 00:27:02 2020 -0400
@@ -14,7 +14,7 @@
:components ((:module "src" :serial t
:components ((:file "package")
(:file "basic")
- (:file "discard")
+ (:file "opaque")
(:file "mop")))))
--- a/src/discard.lisp Fri Aug 14 23:08:54 2020 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-(in-package :jarl)
-
-;; Optimized reader for cases where you don't actually care about the value and
-;; just need to parse over it without allocating anything.
-
-(defun discard-literal (string input)
- (loop :for next :across string
- :for char = (r input)
- :unless (eql next char)
- :do (e nil input "expected ~S when parsing ~S but got ~S" next string char)))
-
-(defun discard-array (input)
- (r input) ; [
- (incf-depth input)
- (skip-whitespace input)
- (if (eql (p input) #\])
- (progn (decf-depth input)
- (r input))
- (loop (discard-any input)
- (skip-whitespace input)
- (let ((ch (r input)))
- (case ch
- (#\] (decf-depth input) (return))
- (#\, (skip-whitespace input))
- (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch)))))))
-
-(defun discard-object (input)
- (r input) ; {
- (incf-depth input)
- (skip-whitespace input)
- (if (eql (p input) #\})
- (progn (decf-depth input)
- (r input))
- (loop (discard-string input)
- (parse-kv-separator nil input)
- (discard-any input)
- (skip-whitespace input)
- (let ((ch (r input)))
- (case ch
- (#\} (decf-depth input) (return))
- (#\, (skip-whitespace input))
- (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
-
-(defun discard-string (input)
- (let ((ch (r input)))
- (unless (eql ch #\")
- (e nil input "expected opening ~S but got ~S" #\" ch)))
- (loop :for ch = (r input)
- :do (cond
- ((eql ch :eof) (e nil input "got ~S" :eof))
- ((eql ch #\\) (parse-escaped-character input)) ; TODO: Optimize this too.
- ((eql ch #\") (return))
- ((requires-escape-p ch) (e nil input "bad unescaped character ~S" ch))
- (t nil))))
-
-(defun discard-number (input)
- ;; TODO: Optimize this too. Not a huge priority since fixnums don't cons.
- (parse-number input))
-
-(defun discard-any (input)
- (case (p input)
- (:eof (r input) (e nil input "got ~S" :eof))
- (#\n (discard-literal "null" input))
- (#\t (discard-literal "true" input))
- (#\f (discard-literal "false" input))
- (#\" (discard-string input))
- (#\{ (discard-object input))
- (#\[ (discard-array input))
- ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (discard-number input))
- (t (e nil input "unexpected character ~S" (r input)))))
-
-
-(defmethod read% ((class (eql 'nil)) contained-class input)
- (skip-whitespace input)
- (discard-any input)
- (values))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/opaque.lisp Sat Aug 15 00:27:02 2020 -0400
@@ -0,0 +1,157 @@
+(in-package :jarl)
+
+;; Optimized readers for cases where you just want to make sure the JSON parses
+;; and preserve it for later, but don't want to allocate all the internal
+;; objects.
+;;
+;; Parsing into opaque-json stores the JSON in a string so it can be emitted
+;; later, but doesn't bother allocating all the internal objects It's similar
+;; to Golang's json.RawMessage.
+;;
+;; Parsing into nil parses the JSON to make sure it's well formed, but discards
+;; the characters entirely.
+;;
+;; TODO: Opaque JSON doesn't seem to save all that much over vanilla parsing.
+;; Investigate why, and whether we should bother with this at all.
+
+
+(defclass opaque-json ()
+ ((data :accessor data :initarg :data)))
+
+(defmethod print-object ((o opaque-json) s)
+ (print-unreadable-object (o s :type t)
+ (format s "~S" (if (> (length (data o)) 10)
+ (concatenate 'string (subseq (data o) 0 10) "…")
+ (data o)))))
+
+(defun w (input output) ; write through (and read)
+ (declare (type input input) (optimize (speed 3) (safety 1) (debug 1)))
+ (let ((ch (r input)))
+ (unless (or (null output) (eql :eof ch))
+ (write-char ch output))
+ ch))
+
+
+(defun opaque-hex-digit (input output)
+ (let ((ch (w input output)))
+ (if (eql :eof ch)
+ (e nil input "cannot parse \\u escape sequence, got ~S" :eof)
+ (or (digit-char-p ch 16)
+ (e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch)))))
+
+(defun opaque-escaped-character (input output)
+ (let ((ch (w input output)))
+ (case ch
+ ((#\" #\\ #\/ #\b #\f #\n #\r #\t) nil)
+ (#\u (loop :repeat 4 :do (opaque-hex-digit input output)))
+ (t (e nil input "bad escape sequence ~S ~S" #\\ ch))))
+ nil)
+
+(defun opaque-literal (string input output)
+ (loop :for next :across string
+ :for char = (w input output)
+ :unless (eql next char)
+ :do (e nil input "expected ~S when parsing ~S but got ~S" next string char)))
+
+(defun opaque-array (input output)
+ (w input output) ; [
+ (incf-depth input)
+ (skip-whitespace input)
+ (if (eql (p input) #\])
+ (progn (decf-depth input)
+ (w input output))
+ (loop (opaque-any input output)
+ (skip-whitespace input)
+ (let ((ch (w input output)))
+ (case ch
+ (#\] (decf-depth input) (return))
+ (#\, (skip-whitespace input))
+ (t (e nil input "expected ~S or ~S but got ~S." #\] #\, ch)))))))
+
+(defun opaque-object (input output)
+ (w input output) ; {
+ (incf-depth input)
+ (skip-whitespace input)
+ (if (eql (p input) #\})
+ (progn (decf-depth input)
+ (w input output))
+ (loop (opaque-string input output)
+ (parse-kv-separator nil input)
+ (when output (write-char #\: output))
+ (opaque-any input output)
+ (skip-whitespace input)
+ (let ((ch (w input output)))
+ (case ch
+ (#\} (decf-depth input) (return))
+ (#\, (skip-whitespace input))
+ (t (e nil input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
+
+(defun opaque-string (input output)
+ (let ((ch (w input output)))
+ (unless (eql ch #\")
+ (e nil input "expected opening ~S but got ~S" #\" ch)))
+ (loop :for ch = (w input output)
+ :do (case ch
+ (:eof (e nil input "got ~S" :eof))
+ (#\\ (opaque-escaped-character input output))
+ (#\" (return))
+ (t (if (requires-escape-p ch)
+ (e nil input "bad unescaped character ~S" ch)
+ nil)))))
+
+(defun opaque-int (input output &optional (allow-leading-zero t))
+ (loop :for n :from 0
+ :for ch = (p input #\e)
+ :for digit = (digit-char-p ch)
+ :while digit
+ :do (progn (w input output)
+ (when (and (not allow-leading-zero)
+ (zerop n) ; leading
+ (zerop digit) ; zero
+ (digit-char-p (p input #\e))) ; but not a bare zero
+ (e nil input "bad leading zero")))
+ :finally (when (zerop n)
+ (e nil input "expected an integer"))))
+
+(defun opaque-exponent (input output)
+ (when (member (p input) '(#\+ #\-))
+ (w input output))
+ (opaque-int input output))
+
+(defun opaque-number (input output)
+ (when (eql #\- (p input))
+ (w input output))
+ (opaque-int input output nil)
+ (when (eql #\. (p input))
+ (w input output)
+ (opaque-int input output))
+ (when (member (p input) '(#\e #\E))
+ (w input output)
+ (opaque-exponent input output)))
+
+(defun opaque-any (input output)
+ (case (p input)
+ (:eof (w input output) (e 'opaque-json input "got ~S" :eof))
+ (#\n (opaque-literal "null" input output))
+ (#\t (opaque-literal "true" input output))
+ (#\f (opaque-literal "false" input output))
+ (#\" (opaque-string input output))
+ (#\{ (opaque-object input output))
+ (#\[ (opaque-array input output))
+ ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (opaque-number input output))
+ (t (e nil input "unexpected character ~S" (w input output)))))
+
+
+(defmethod read% ((class (eql 'opaque-json)) contained-class input)
+ (skip-whitespace input)
+ (make-instance 'opaque-json
+ :data (with-output-to-string (s)
+ (opaque-any input s))))
+
+(defmethod print% ((thing opaque-json) stream)
+ (write-string (data thing) stream))
+
+(defmethod read% ((class (eql 'nil)) contained-class input)
+ (skip-whitespace input)
+ (opaque-any input nil)
+ (values))
--- a/test/tests.lisp Fri Aug 14 23:08:54 2020 -0400
+++ b/test/tests.lisp Sat Aug 15 00:27:02 2020 -0400
@@ -58,6 +58,15 @@
(is (null (multiple-value-list (jarl:read nil s))))
(signals end-of-file (jarl:read t s))))
+(defun check-opaques-one-object (object string)
+ (with-input-from-string (s string)
+ ;; Make sure we can read it as opaque JSON.
+ (let ((oj (jarl:read 'jarl::opaque-json s)))
+ ;; Roundtrip it through print and read again and make sure it doesn't get
+ ;; mangled.
+ (is (same object (jarl:read t (jarl:print oj nil)))))
+ (signals end-of-file (jarl:read t s))))
+
(defun check-roundtrips (object)
(is (same object (jarl:read t (jarl:print object nil)))))
@@ -82,7 +91,9 @@
; Check that the entire string deserializes to the expected form.
(check-reads-one-object ,object ,string)
; Check that we can roundtrip the form reliably.
- (is (same ,object (jarl:read t (jarl:print ,object nil))))
+ (check-roundtrips ,object)
+ ; Check that we can parse it opaquely too.
+ (check-opaques-one-object ,object ,string)
; Check that we can discard it safely.
(check-discards-one-object ,string))))))