author |
Steve Losh <steve@stevelosh.com> |
date |
Sat, 18 Jul 2020 13:53:29 -0400 |
parents |
a2712b3d3b16 |
children |
e3c35d5a9681 |
(in-package :jarl)
;;;; Notes --------------------------------------------------------------------
;;;
;;; Reading is implemented as a generic function read% of three arguments:
;;;
;;; 1. The name of the class to read.
;;; 2. Any contained class designator, for things like (vector foo) or (nullable foo).
;;; 3. The input struct.
;;;
;;; Printing is just a simple generic function of the object and a stream.
;;;; Input --------------------------------------------------------------------
(defstruct input
(stream nil :type stream)
(line 1 :type (and fixnum (integer 0)))
(column 0 :type (and fixnum (integer 0))))
(defun p (input &optional (eof :eof)) ; peek
(peek-char nil (input-stream input) nil eof))
(defun r (input) ; read
(let ((character (read-char (input-stream input) nil :eof)))
(case character
(#\newline (progn
(incf (input-line input))
(setf (input-column input) 0)))
(#\tab (incf (input-column input) 8))
(t (incf (input-column input))))
character))
;;;; Errors -------------------------------------------------------------------
(define-condition json-parsing-error (error)
((line :accessor line :initarg :line)
(column :accessor column :initarg :column)
(message :accessor message :initarg :message))
(:report (lambda (c stream)
(format stream "Error parsing JSON at line ~D column ~D: ~A"
(line c)
(column c)
(message c)))))
(defun e (input format-string &rest args) ; error
(error 'json-parsing-error
:line (input-line input)
:column (input-column input)
:message (apply #'format nil format-string args)))
;;;; Parsing Utilities --------------------------------------------------------
(defun skip-whitespace (input)
(loop :for ch = (p input)
:while (member ch '(#\space #\tab #\newline #\linefeed #\return))
:do (r input)))
(defun requires-escape-p (character)
(or (char= #\" character)
(char= #\\ character)
(<= (char-code character) #x1F)))
(defun parse-literal (input literal remaining-characters)
(loop :for next :across remaining-characters
:for char = (r input)
:unless (eql next char)
:do (e input "Cannot parse literal ~S, expected ~S but got ~S" literal next char))
literal)
(defun parse-hex-digit (input)
(let ((ch (r input)))
(if (eql :eof ch)
(e input "Cannot parse \\u escape sequence, got ~S." :eof)
(or (digit-char-p ch 16)
(e input "Cannot parse \\u escape sequence, ~S is not a hexadecimal digit." ch)))))
(defun parse-escaped-character (input)
(let ((ch (r input)))
(case ch
(#\" #\")
(#\\ #\\)
(#\/ #\/)
(#\b #\backspace)
(#\f (code-char #x0C))
(#\n #\linefeed)
(#\r #\return)
(#\t #\tab)
(#\u (let ((a (parse-hex-digit input)) ; todo handle surrogate pairs
(b (parse-hex-digit input))
(c (parse-hex-digit input))
(d (parse-hex-digit input)))
(code-char (+ (* a (expt 16 3)) ; todo maybe do this more portably
(* b (expt 16 2))
(* c (expt 16 1))
(* d (expt 16 0))))))
(t (e input "Cannot parse string, bad escape sequence ~S ~S." #\\ ch)))))
(defun parse-int (input &optional (allow-leading-zero t))
(loop :with i = 0
:with n = 0
:for ch = (p input #\e)
:for digit = (digit-char-p ch)
:while digit
:do (progn (r input)
(when (and (not allow-leading-zero)
(zerop n) ; leading
(zerop digit) ; zero
(digit-char-p (p input #\e))) ; but not a bare zero
(e input "Cannot parse integer, bad leading zero."))
(incf n)
(setf i (+ (* 10 i) digit)))
:finally (if (zerop n)
(e input "Expected an integer.")
(return (values i n)))))
(defun parse-exponent (input)
(let* ((char (p input))
(sign (if (member char '(#\+ #\-))
(progn (r input)
(case char
(#\+ 1)
(#\- -1)))
1)))
(* sign (parse-int input))))
(defun parse-number (input)
(let ((sign 1) integer
(fractional 0) (fractional-length 0)
(exponent 0) has-exponent)
(when (eql #\- (p input))
(r input)
(setf sign -1))
(setf integer (parse-int input nil))
(when (eql #\. (p input))
(r input)
(setf (values fractional fractional-length) (parse-int input)))
(when (member (p input) '(#\e #\E))
(r input)
(setf exponent (parse-exponent input)
has-exponent t))
(if (and (zerop fractional-length) (not has-exponent))
(* sign integer)
(values
(coerce ;; todo make this less horrifying
(read-from-string (format nil "~A~D.~V,'0Dd~D"
(if (= -1 sign) #\- #\+)
integer
fractional-length
fractional
exponent))
'double-float)))))
(defun parse-kv-separator (class input)
(skip-whitespace input)
(let ((ch (r input)))
(unless (eql #\: ch)
(e input "Cannot parse ~S, expected ~S but got ~S" class #\: ch))
(skip-whitespace input)))
(defun json-type-of (object)
(typecase object
(integer 'integer)
(float 'float)
(t (type-of object))))
;;;; Primitive Readers --------------------------------------------------------
(defgeneric read% (class contained-class input))
(defmacro defreader (class (input &optional contained-class) &body body)
`(defmethod read% ((class (eql ',class)) ,(or contained-class 'contained-class) (,input input))
(declare (ignore class ,@(unless contained-class '(contained-class))))
,@body))
(defreader keyword (input)
(let ((ch (r input)))
(case ch
(#\t (parse-literal input :true "rue"))
(#\f (parse-literal input :false "alse"))
(t (e input "Cannot parse keyword, expected ~S or ~S but got ~S." #\t #\f ch)))))
(defreader null (input)
(let ((ch (r input)))
(if (eql ch #\n)
(parse-literal input nil "ull")
(e input "Cannot parse null, expected ~S but got ~S." #\n ch))))
(defreader vector (input contained)
(let ((ch (r input)))
(unless (eql ch #\[)
(e input "Cannot parse vector, expected ~S but got ~S." #\[ ch)))
(skip-whitespace input)
;; todo allow specialized vectors?
(if (eql (p input) #\])
(progn (r input) (vector))
(coerce
(loop
:with (c cc) = contained
:collect (read% c cc input)
:do (progn
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\] (loop-finish))
(#\, (skip-whitespace input))
(t (e input "Cannot parse vector, expected ~S or ~S but got ~S." #\] #\, ch))))))
'vector)))
(defreader string (input)
(let ((ch (r input)))
(unless (eql ch #\")
(e input "Cannot parse string, expected opening ~S but got ~S." #\" ch)))
(with-output-to-string (s)
(loop :for ch = (r input)
:collect (cond
((eql ch :eof) (e input "Cannot parse string, got ~S." :eof))
((eql ch #\\) (write-char (parse-escaped-character input) s))
((eql ch #\") (loop-finish))
((requires-escape-p ch) (e input "Cannot parse string, bad unescaped character ~S." ch))
(t (write-char ch s))))))
(defreader hash-table (input contained)
(let ((ch (r input)))
(unless (eql ch #\{)
(e input "Cannot parse hash table, expected ~S but got ~S." #\{ ch)))
(skip-whitespace input)
(let ((result (make-hash-table :test #'equal)))
(if (eql (p input) #\})
(r input)
(loop
:with (c cc) = contained
:for name = (read% 'string nil input)
:for sep = (parse-kv-separator 'hash-table input)
:for value = (progn (skip-whitespace input)
(read% c cc input))
:do (progn
(setf (gethash name result) value)
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\} (loop-finish))
(#\, (skip-whitespace input))
(t (e input "Cannot parse hash table, expected ~S or ~S but got ~S." #\} #\, ch)))))))
result))
(defreader number (input)
(let ((ch (p input)))
(unless (member ch '(#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(r input) ; chomp to ensure accurate column count
(e input "Cannot parse number, expected digit or ~S but got ~S." #\- ch)))
(parse-number input))
(defreader nullable (input contained)
(case (p input)
(:eof (e "Cannot parse ~S, got ~S." `(or null ,input) :eof))
(#\n (read% 'null nil input))
(t (read% (first contained) (second contained) input))))
(defreader t (input)
(skip-whitespace input)
(ecase (p input)
(:eof (r input) (e input "Cannot parse JSON, got ~S." :eof))
((#\t #\f) (read% 'keyword nil input))
(#\n (read% 'null nil input))
(#\" (read% 'string nil input))
(#\{ (read% 'hash-table '(t) input))
(#\[ (read% 'vector '(t) input))
((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read% 'number nil input))))
;;;; Object Parsers -----------------------------------------------------------
(defclass json-class (standard-class)
((slot-name-to-json-name :accessor slot-name-to-json-name
:initarg :slot-name-to-json-name
:initform 'string-downcase)
(unknown-slots :accessor unknown-slots
:initarg :unknown-slots
:initform :discard)
(name-initarg-map :accessor name-initarg-map)))
(defclass json-direct-slot-definition (c2mop:standard-direct-slot-definition)
((json-name :initarg :json/name :accessor json-name)
(json-class :initarg :json/class :accessor json-class)))
(defclass json-effective-slot-definition (c2mop:standard-effective-slot-definition)
((json-name :initarg :json/name :accessor json-name)
(json-class :initarg :json/class :accessor json-class)
(json-initarg :accessor json-initarg)))
(defmethod c2mop:direct-slot-definition-class ((class json-class) &rest initargs)
(declare (ignore initargs))
(find-class 'json-direct-slot-definition))
(defmethod c2mop:effective-slot-definition-class ((class json-class) &rest initargs)
(declare (ignore initargs))
(find-class 'json-effective-slot-definition))
(defmethod c2mop:compute-effective-slot-definition ((class json-class) name direct-slots)
(let ((eslot (call-next-method))
(dslot (first direct-slots))) ; todo be smarter about coalescing these
(setf (json-name eslot) (if (slot-boundp dslot 'json-name)
(json-name dslot)
(funcall (slot-name-to-json-name class) name)) ; todo make this less shitty
(json-class eslot) (if (slot-boundp dslot 'json-class)
(canonicalize-class-designator (json-class dslot))
'(t))
(json-initarg eslot) (gensym (format nil "json-initarg-~A" name))) ; todo nicer name
eslot))
(defun json-slots (class)
(remove-if-not (lambda (slot) (typep slot 'json-effective-slot-definition))
(c2mop:class-slots class)))
(defun make-name-initarg-map (class)
(let* ((slots (json-slots class))
(result (make-hash-table :test #'equal :size (length slots))))
(dolist (slot slots)
(setf (gethash (json-name slot) result)
(json-initarg slot)))
result))
(defmethod shared-initialize ((instance json-class) slot-names
&rest initargs
&key slot-name-to-json-name unknown-slots
&allow-other-keys)
(apply #'call-next-method instance slot-names
(append
(when slot-name-to-json-name ; todo assert length = 1
(list :slot-name-to-json-name (first slot-name-to-json-name)))
(when unknown-slots ; todo assert length = 1
(list :unknown-slots (first unknown-slots)))
initargs))
instance)
(defun build-reader-lambda (class)
;; todo consider whether compiling a separate method for every class is REALLY
;; worth it, or whether we should just do things the reflecty way like
;; everyone else in the world.
(let ((slots (c2mop:class-slots class))
(class-name (class-name class)))
`(lambda (class contained-class input)
(let ((result (make-instance ',(class-name class))))
(let ((ch (r input)))
(unless (eql ch #\{)
(e input "Cannot parse ~S, expected ~S but got ~S." ',class-name #\{ ch)))
(skip-whitespace input)
(if (eql (p input) #\})
(r input)
(loop
:for name = (read% 'string nil input)
:for sep = (parse-kv-separator ',class-name input)
:do (progn
(cond
,@(loop
:for slot :in slots
:for name = (json-name slot)
:for (c cc) = (json-class slot)
:collect `((string= name ,name)
(setf (slot-value result ',(c2mop:slot-definition-name slot))
(read% ',c ',cc input)))) ; todo types
(t ,(ecase (unknown-slots class)
(:discard `(read% t nil input)) ; todo: handle discarded keys, skip more efficiently
(:error `(e input "Cannot parse ~S, got unknown object attribute ~S." ',class-name name)))))
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\} (loop-finish))
(#\, (skip-whitespace input))
(t (e input "Cannot parse ~S, expected ~S or ~S but got ~S." ',class-name #\} #\, ch)))))))
result))))
(defun make-read%-method (class)
(multiple-value-bind (lambda-form initargs)
(c2mop:make-method-lambda #'read%
(first (c2mop:generic-function-methods #'read%))
(build-reader-lambda class)
nil)
(apply #'make-instance 'standard-method
:lambda-list '(class contained-class input)
:specializers (list (c2mop:intern-eql-specializer (class-name class))
(find-class t)
(find-class 'input))
:function (compile nil lambda-form)
initargs)))
(defmethod c2mop:finalize-inheritance :after ((class json-class))
(setf (name-initarg-map class) (make-name-initarg-map class))
nil
; todo: we may need to do things a bit differently here. I think the MOP spec
; says that the class doesn't need to be finalized until we're ready to
; allocate the first instance. but we need the read% method to be ready
; *before* that, because that method is what allocates the instance! So we
; might need to do something like making the default method on read% allocate
; a result instance and pass it to a call on a separate method read%%. Sigh.
#+no(add-method #'read% (make-read%-method class)))
(defmethod c2mop:validate-superclass ((class json-class) (superclass standard-class))
t)
(defclass post ()
((id :json/class number)
(title :json/class string)
(body :json/name "pingus")
(author :json/class user))
(:metaclass json-class)
(:unknown-slots :discard))
(defclass user ()
((id :type integer)
(name :type string)
(friends :type (vector integer)))
(:metaclass json-class))
;; (make-instance 'post)
;; (build-reader-lambda (find-class 'post))
;; (find-class 'user)
(find-class 'post)
;; (make-instance 'post)
;; (setf (find-class 'post) nil (find-class 'user) nil)
;; (read '(vector post) (substitute #\" #\' "[{
;; 'id': 69,
;; 'pingus': 'oh my christ, it works!',
;; 'author': {
;; 'id': 101,
;; 'name': 'sjl'
;; },
;; 'wat': 1
;; }, {
;; 'id': 420,
;; 'title': 'hello, world!',
;; 'pingus': 'incredible',
;; 'author': {
;; 'id': 101,
;; 'name': 'sjl',
;; 'friends': [1,2,3]
;; }
;; }]"))
(defun parse-json-class (class-name class input)
(let ((ch (r input)))
(unless (eql ch #\{)
(e input "Cannot parse ~S, expected ~S but got ~S." class-name #\{ ch)))
(skip-whitespace input)
(if (eql (p input) #\})
(progn
(r input)
(make-instance class))
(loop
:with unknown = (first (unknown-slots class))
:with map = (name-initarg-map class)
:with init = (list)
:for name = (read% 'string nil input)
:for sep = (parse-kv-separator class-name input)
:for (initarg c cc) = (gethash name map)
:do (progn
(if (null initarg)
(ecase unknown
(:discard (read% t nil input))
(:error (e input "Cannot parse ~S, got unknown object attribute ~S." class-name name)))
(progn
(push (read% c cc input) init)
(push initarg init)))
(skip-whitespace input)
(let ((ch (r input)))
(case ch
(#\} (loop-finish))
(#\, (skip-whitespace input))
(t (e input "Cannot parse ~S, expected ~S or ~S but got ~S." class-name #\} #\, ch)))))
:finally (return (apply #'make-instance class init)))))
(defmethod read% ((class-name symbol) (contained-class null) (input input))
(let ((class (find-class class-name nil)))
(typecase class
(json-class (parse-json-class class-name class input))
(null (error "Cannot find class ~S to parse JSON into." class-name))
(t (error "Cannot parse JSON into class ~S because it is not a ~S" class-name 'json-class)))))
;;;; Printing -----------------------------------------------------------------
(defun write-escaped-char (char stream)
(case char
((#\newline #\linefeed) (write-string "\\n" stream))
(#\return (write-string "\\r" stream))
(#\\ (write-string "\\\\" stream))
(t (format stream "\\u~4,'0X" (char-code char)))))
(defgeneric print% (thing stream))
(defmethod print% ((thing null) stream)
(write-string "null" stream))
(defmethod print% ((thing string) stream)
(write-char #\" stream)
(loop :for char :across thing
:do (if (requires-escape-p char)
(write-escaped-char char stream)
(write-char char stream)))
(write-char #\" stream))
(defmethod print% ((thing vector) stream)
(write-char #\[ stream)
(loop :with first = t
:for object :across thing
:do (progn (if first
(setf first nil)
(write-char #\, stream))
(print% object stream)))
(write-char #\] stream))
(defmethod print% ((thing hash-table) stream)
(write-char #\{ stream)
(loop :with first = t
:for name :being :the hash-keys :in thing :using (hash-value value)
:do (progn (if first
(setf first nil)
(write-char #\, stream))
(assert (stringp name))
(print% name stream)
(write-char #\: stream)
(print% value stream)))
(write-char #\} stream))
(defmethod print% ((thing single-float) stream)
(let ((*read-default-float-format* 'single-float))
(princ thing stream)))
(defmethod print% ((thing double-float) stream)
(let ((*read-default-float-format* 'double-float))
(princ thing stream)))
(defmethod print% ((thing integer) stream)
(format stream "~D" thing))
(defmethod print% ((thing (eql :false)) stream)
(write-string "false" stream))
(defmethod print% ((thing (eql :true)) stream)
(write-string "true" stream))
(defun print (object &optional (stream *standard-output*))
(let ((*read-default-float-format* 'double-float)
(*print-base* 10))
(etypecase stream
((or stream (eql t)) (progn (print% object stream)
(values)))
(null (with-output-to-string (s)
(print% object s))))))
;;;; API ----------------------------------------------------------------------
(defun canonicalize-class-designator (class-designator)
(flet ((fail () (error "Malformed class designator ~S" class-designator)))
(etypecase class-designator
(cons (destructuring-bind (head a &optional (b nil b?)) class-designator
(ecase head
; (vector foo)
((hash-table vector)
(progn (when b? (fail))
(list head (canonicalize-class-designator a))))
; (or null foo)
(or (progn
(unless b? (fail)) ; must have a second option
(when (eql 'null b) (rotatef a b)) ; sort a/b
(unless (eql 'null a) (fail)) ; no arbitrary ors
(list 'nullable (canonicalize-class-designator b)))))))
(symbol (case class-designator
(vector '(vector (t)))
(hash-table '(hash-table (t)))
(t (list class-designator)))))))
(defun ensure-stream (stream-or-string)
(etypecase stream-or-string
(stream stream-or-string)
(string (make-string-input-stream stream-or-string))))
(defun read (class-designator stream-or-string &optional (eof-error-p t) eof)
(let ((input (make-input :stream (ensure-stream stream-or-string))))
(skip-whitespace input)
(if (eql :eof (p input))
(if eof-error-p
(error 'end-of-file)
eof)
(destructuring-bind (class &optional contained)
(canonicalize-class-designator class-designator)
(read% class contained input)))))
#; Scratch --------------------------------------------------------------------
(read '1 "[true, null, false, true]")
(canonicalize-class-designator 'keyword)
(defclass post ()
((title :type string)
(id :type integer)
(body :type string))
(:metaclass json-class))
(defclass user ()
((name :type string)
(id :type integer)
(posts :type (vector post)))
(:metaclass json-class))
(canonicalize-type-designator '(vector keyword))