src/basic.lisp @ 37efd8463e96

Create separate error types for malformed JSON and unknown slots
author Steve Losh <steve@stevelosh.com>
date Wed, 02 Dec 2020 23:40:36 -0500
parents 6d4f34a78d74
children (none)
(in-package :jarl)

;;;; Input --------------------------------------------------------------------
(defparameter *read-size-limit* (expt 2 30)
  "The maximum number of characters to read in a single `jarl:read` call.

  If more than this number of characters would have to be read to finish reading
  a single object, a `json-size-limit-exceeded-error` will be signaled.

  If both the size and depth limits are exceeded by exactly the same character,
  it is unspecified which of the two errors will be signaled.")

(defparameter *read-depth-limit* 100
  "The maximum depth of nested objects and vectors to allow in a single `jarl:read` call.

  If reading a single object would require descending into more than this number
  of JSON objects and/or arrays, a `json-depth-limit-exceeded-error` error will
  be signaled.

  If both the size and depth limits are exceeded by exactly the same character,
  it is unspecified which of the two errors will be signaled.")

(defparameter *indent* nil)


(defstruct (input (:constructor make-input%))
  (stream nil :type stream)
  (line 1 :type (and fixnum (integer 0)))
  (column 0 :type (and fixnum (integer 0)))
  (depth 0 :type (and fixnum (integer 0)))
  (depth-limit *read-depth-limit* :type (and fixnum (integer 0)))
  (size 0 :type (and fixnum (integer 0)))
  (size-limit *read-size-limit* :type (and fixnum (integer 0)))
  (string-buffer nil :type (or null stream)))

(defun ensure-stream (stream-or-string)
  (etypecase stream-or-string
    (stream stream-or-string)
    (string (make-string-input-stream stream-or-string))))

(defun make-input (stream-or-string)
  (make-input% :stream (ensure-stream stream-or-string)))

(defun reset-limits (input)
  (setf (input-depth input) 0
        (input-size input) 0
        (input-depth-limit input) *read-depth-limit*
        (input-size-limit input) *read-size-limit*))

(defun reset-position (input)
  (setf (input-line input) 0
        (input-column input) 0))


(defun p (input &optional (eof :eof)) ; peek
  (declare (type input input)
           (optimize (speed 3) (safety 1) (debug 1)))
  (peek-char nil (input-stream input) nil eof))

(defun r (input) ; read
  (declare (type input input)
           (optimize (speed 3) (safety 1) (debug 1)))
  (let ((character (read-char (input-stream input) nil :eof)))
    (case character
      (#\newline (progn
                   (incf (input-line input))
                   (setf (input-column input) 0)))
      (#\tab (incf (input-column input) 8))
      (t (incf (input-column input))))
    (when (> (incf (input-size input))
             (input-size-limit input))
      (error 'json-size-limit-exceeded-error
             :line (input-line input)
             :column (input-column input)
             :limit (input-size-limit input)))
    character))


;;;; Errors -------------------------------------------------------------------
(define-condition json-error (error)
  ((line :accessor line :initarg :line)
   (column :accessor column :initarg :column)))

(define-condition json-reading-error (json-error)
  ((class-designator :accessor class-designator :initarg :class-designator)
   (message :accessor message :initarg :message))
  (:report
   (lambda (c stream)
     (format stream "Error reading JSON~@[ into ~S~] at line ~D column ~D: ~A"
             (class-designator c)
             (line c)
             (column c)
             (message c)))))

(define-condition malformed-json-error (json-reading-error)
  ())

(define-condition unknown-json-slot-error (json-reading-error)
  ((name :accessor name :initarg :name)))

(define-condition json-limit-exceeded-error (json-reading-error)
  ((limit :accessor limit :initarg :limit)
   (limit-name :allocation :class))
  (:report
   (lambda (c stream)
     (format stream "~:(~A~) limit (~D) exceeded while reading JSON at line ~D column ~D."
             (slot-value c 'limit-name)
             (limit c)
             (line c)
             (column c)))))

(define-condition json-size-limit-exceeded-error (json-limit-exceeded-error)
  ((limit-name :initform "size")))

(define-condition json-depth-limit-exceeded-error (json-limit-exceeded-error)
  ((limit-name :initform "depth")))


(defun e (class input format-string &rest args) ; error
  (error 'malformed-json-error
         :class-designator class
         :line (input-line input)
         :column (input-column input)
         :message (apply #'format nil format-string args)))


(defun incf-depth (input)
  (declare (type input input)
           (optimize (speed 3) (safety 1) (debug 1)))
  (when (> (incf (input-depth input))
           (input-depth-limit input))
    (error 'json-depth-limit-exceeded-error
           :line (input-line input)
           :column (input-column input)
           :limit (input-depth-limit input))))

(defun decf-depth (input)
  (declare (type input input)
           (optimize (speed 3) (safety 1) (debug 1)))
  (decf (input-depth input)))


;;;; Parsing Utilities --------------------------------------------------------
(defun skip-whitespace (input)
  (loop :for ch = (p input)
        :while (member ch '(#\space #\tab #\newline #\linefeed #\return))
        :do (r input)))

(defun requires-escape-p (character)
  (or (char= #\" character)
      (char= #\\ character)
      (<= (char-code character) #x1F))) ; TODO: Technically this isn't portable.

(defun parse-literal (input literal remaining-characters)
  (loop :for next :across remaining-characters
        :for char = (r input)
        :unless (eql next char)
        :do (e 'keyword input "expected ~S when reading ~S but got ~S" next literal char))
  literal)

(defun parse-hex-digit (input)
  (let ((ch (r input)))
    (if (eql :eof ch)
      (e nil input "cannot parse \\u escape sequence, got ~S" :eof)
      (or (digit-char-p ch 16)
          (e nil input "cannot parse \\u escape sequence, ~S is not a hexadecimal digit" ch)))))

(defun parse-escaped-character (input)
  (let ((ch (r input)))
    (case ch
      (#\" #\")
      (#\\ #\\)
      (#\/ #\/)
      (#\b #\backspace)
      (#\f (code-char #x0C))
      (#\n #\linefeed)
      (#\r #\return)
      (#\t #\tab)
      (#\u (let ((a (parse-hex-digit input)) ; todo handle surrogate pairs
                 (b (parse-hex-digit input))
                 (c (parse-hex-digit input))
                 (d (parse-hex-digit input)))
             (code-char (+ (* a (expt 16 3)) ; todo maybe do this more portably
                           (* b (expt 16 2))
                           (* c (expt 16 1))
                           (* d (expt 16 0))))))
      (t (e nil input "bad escape sequence ~S ~S" #\\ ch)))))

(defun parse-int (input &optional (allow-leading-zero t))
  (loop :with i = 0
        :with n = 0
        :for ch = (p input #\e)
        :for digit = (digit-char-p ch)
        :while digit
        :do (progn (r input)
                   (when (and (not allow-leading-zero)
                              (zerop n) ; leading
                              (zerop digit) ; zero
                              (digit-char-p (p input #\e))) ; but not a bare zero
                     (e nil input "bad leading zero"))
                   (incf n)
                   (setf i (+ (* 10 i) digit)))
        :finally (if (zerop n)
                   (e nil input "expected an integer")
                   (return (values i n)))))

(defun parse-exponent (input)
  (let* ((char (p input))
         (sign (if (member char '(#\+ #\-))
                 (progn (r input)
                        (case char
                          (#\+ 1)
                          (#\- -1)))
                 1)))
    (* sign (parse-int input))))

(defun parse-number (input)
  (let ((sign 1) integer
        (fractional 0) (fractional-length 0)
        (exponent 0) has-exponent)
    (when (eql #\- (p input))
      (r input)
      (setf sign -1))
    (setf integer (parse-int input nil))
    (when (eql #\. (p input))
      (r input)
      (setf (values fractional fractional-length) (parse-int input)))
    (when (member (p input) '(#\e #\E))
      (r input)
      (setf exponent (parse-exponent input)
            has-exponent t))
    (if (and (zerop fractional-length) (not has-exponent))
      (* sign integer)
      (values
        (coerce ;; todo make this less horrifying
          (read-from-string (format nil "~A~D.~V,'0Dd~D"
                                    (if (= -1 sign) #\- #\+)
                                    integer
                                    fractional-length
                                    fractional
                                    exponent))
          'double-float)))))

(defun parse-kv-separator (class input)
  (skip-whitespace input)
  (let ((ch (r input)))
    (unless (eql #\: ch)
      (e class input "expected separator ~S but got ~S" #\: ch))
    (skip-whitespace input)))


;;;; Readers ------------------------------------------------------------------
(defgeneric read% (class contained-class input))


(defmethod read% ((class (eql 'keyword)) contained-class input)
  (declare (ignore contained-class))
  (let ((ch (r input)))
    (case ch
      (#\t (parse-literal input :true "rue"))
      (#\f (parse-literal input :false "alse"))
      (t (e 'keyword input "expected ~S or ~S but got ~S" #\t #\f ch)))))

(defmethod read% ((class (eql 'null)) contained-class input)
  (declare (ignore contained-class))
  (let ((ch (r input)))
    (if (eql ch #\n)
      (parse-literal input nil "ull")
      (e 'null input "expected ~S but got ~S" #\n ch))))

(defmethod read% ((class (eql 'vector)) contained-class input)
  (let ((ch (r input)))
    (unless (eql ch #\[)
      (e 'vector input "expected ~S but got ~S" #\[ ch)))
  (incf-depth input)
  (skip-whitespace input)
  ;; todo allow specialized vectors?
  (if (eql (p input) #\])
    (progn (r input)
           (decf-depth input)
           (vector))
    (coerce
      (loop
        :with c = (car contained-class)
        :with cc = (cadr contained-class)
        :collect (read% c cc input)
        :do (progn
              (skip-whitespace input)
              (let ((ch (r input)))
                (case ch
                  (#\] (decf-depth input) (loop-finish))
                  (#\, (skip-whitespace input))
                  (t (e 'vector input "expected ~S or ~S but got ~S." #\] #\, ch))))))
      'vector)))

(defmethod read% ((class (eql 'string)) contained-class input)
  (declare (ignore contained-class))
  (let ((ch (r input)))
    (unless (eql ch #\")
      (e 'string input "expected opening ~S but got ~S" #\" ch)))
  (loop :with s = (or (input-string-buffer input)
                      (setf (input-string-buffer input)
                            (make-string-output-stream)))
        :for ch = (r input)
        :do (case ch
              (:eof (e 'string input "got ~S" :eof))
              (#\\ (write-char (parse-escaped-character input) s))
              (#\" (return (get-output-stream-string s)))
              (t (if (requires-escape-p ch)
                   (e 'string input "bad unescaped character ~S" ch)
                   (write-char ch s))))))

(defmethod read% ((class (eql 'hash-table)) contained-class input)
  (let ((ch (r input)))
    (unless (eql ch #\{)
      (e 'hash-table input "expected ~S but got ~S" #\{ ch)))
  (incf-depth input)
  (skip-whitespace input)
  (let ((result (make-hash-table :test #'equal)))
    (if (eql (p input) #\})
      (progn (r input)
             (decf-depth input))
      (loop
        :with c = (car contained-class)
        :with cc = (cadr contained-class)
        :for name = (read% 'string nil input)
        :for value = (progn (parse-kv-separator 'hash-table input)
                            (skip-whitespace input)
                            (read% c cc input))
        :do (progn
              (setf (gethash name result) value)
              (skip-whitespace input)
              (let ((ch (r input)))
                (case ch
                  (#\} (decf-depth input) (loop-finish))
                  (#\, (skip-whitespace input))
                  (t (e 'hash-table input "expected ~S or ~S but got ~S" #\} #\, ch)))))))
    result))

(defmethod read% ((class (eql 'number)) contained-class input)
  (declare (ignore contained-class))
  (let ((ch (p input)))
    (unless (member ch '(#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
      (r input) ; chomp to ensure accurate column count
      (e 'number input "expected digit or ~S but got ~S" #\- ch)))
  (parse-number input))

(defmethod read% ((class (eql 'nullable)) contained-class input)
  (case (p input)
    (:eof (e `(or null ,input) input "got ~S"  :eof))
    (#\n (read% 'null nil input))
    (t (read% (first contained-class) (second contained-class) input))))

(defmethod read% ((class (eql t)) contained-class input)
  (declare (ignore contained-class))
  (skip-whitespace input)
  (case (p input)
    (:eof (r input) (e 't input "got ~S" :eof))
    ((#\t #\f) (read% 'keyword nil input))
    (#\n (read% 'null nil input))
    (#\" (read% 'string nil input))
    (#\{ (read% 'hash-table '(t) input))
    (#\[ (read% 'vector '(t) input))
    ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read% 'number nil input))
    (t (e 't input "unexpected character ~S" (r input)))))


;;;; Printing -----------------------------------------------------------------
(defun write-escaped-char (char stream)
  (case char
    ((#\newline #\linefeed) (write-string "\\n" stream))
    (#\return (write-string "\\r" stream))
    (#\\ (write-string "\\\\" stream))
    (t (format stream "\\u~4,'0X" (char-code char)))))


(declaim (inline indent))

(defun indent (i stream)
  (when i
    (format stream "~%~v@T" i)))

(defgeneric print% (thing stream indent))

(defmethod print% ((thing null) stream indent)
  (declare (ignore indent))
  (write-string "null" stream))

(defmethod print% ((thing string) stream indent)
  (declare (ignore indent))
  (write-char #\" stream)
  (loop :for char :across thing
        :do (if (requires-escape-p char)
              (write-escaped-char char stream)
              (write-char char stream)))
  (write-char #\" stream))

(defmethod print% ((thing vector) stream indent &aux (first t))
  (write-char #\[ stream)
  (when indent (incf indent 2))
  (loop :for object :across thing
        :do (progn (if first
                     (setf first nil)
                     (write-char #\, stream))
                   (indent indent stream)
                   (print% object stream indent)))
  (when (and indent (not first))
    (indent (- indent 2) stream))
  (write-char #\] stream))

(defmethod print% ((thing hash-table) stream indent &aux (first t))
  (write-char #\{ stream)
  (when indent (incf indent 2))
  (maphash (lambda (name value)
             (if first
               (setf first nil)
               (write-char #\, stream))
             (indent indent stream)
             (assert (stringp name))
             (print% name stream indent)
             (write-char #\: stream)
             (when indent (write-char #\space stream))
             (print% value stream indent))
           thing)
  (when (and indent (not first))
    (indent (- indent 2) stream))
  (write-char #\} stream))

(defmethod print% ((thing single-float) stream indent)
  (declare (ignore indent))
  (let ((*read-default-float-format* 'single-float))
    (princ thing stream)))

(defmethod print% ((thing double-float) stream indent)
  (declare (ignore indent))
  (let ((*read-default-float-format* 'double-float))
    (princ thing stream)))

(defmethod print% ((thing integer) stream indent)
  (declare (ignore indent))
  (format stream "~D" thing))

(defmethod print% ((thing (eql :false)) stream indent)
  (declare (ignore indent))
  (write-string "false" stream))

(defmethod print% ((thing (eql :true)) stream indent)
  (declare (ignore indent))
  (write-string "true" stream))


;;;; API ----------------------------------------------------------------------
(defun canonicalize-class-designator (class-designator)
  (flet ((fail () (error "Malformed class designator ~S" class-designator)))
    (etypecase class-designator
      (cons (destructuring-bind (head a &optional (b nil b?)) class-designator
              (ecase head
                ; e.g. (vector foo)
                ((hash-table vector)
                 (progn (when b? (fail))
                        (list head (canonicalize-class-designator a))))
                ; e.g. (or null foo)
                (or (progn (unless b? (fail)) ; must have a second option
                           (when (eql 'null b) (rotatef a b)) ; sort a/b
                           (unless (eql 'null a) (fail)) ; no arbitrary ors
                           (list 'nullable (canonicalize-class-designator b)))))))
      (symbol (case class-designator
                (vector '(vector (t)))
                (hash-table '(hash-table (t)))
                (t (list class-designator)))))))


(defun read (class-designator input &optional (eof-error-p t) eof)
  (etypecase input
    (input (reset-limits input))
    ((or stream string) (setf input (make-input input))))
  (skip-whitespace input)
  (if (eql :eof (p input))
    (if eof-error-p
      (error 'end-of-file)
      eof)
    (destructuring-bind (class &optional contained)
        (canonicalize-class-designator class-designator)
      (read% class contained input))))

(defun print (object &optional (stream *standard-output*))
  (let ((*read-default-float-format* 'double-float)
        (*print-base* 10)
        (indent (if *indent* 0 nil)))
    (etypecase stream
      (stream (print% object stream indent) (values))
      ((eql t) (print% object *standard-output* indent) (values))
      (null (with-output-to-string (s) (print% object s indent))))))