--- a/src/main.lisp Tue Dec 25 23:18:43 2018 -0500
+++ b/src/main.lisp Wed Dec 26 21:24:37 2018 -0500
@@ -100,8 +100,14 @@
;;;; API ----------------------------------------------------------------------
-(defun read-row (&optional (stream *standard-input*) (eof-error-p t) eof-value)
- "Read and return a row of fields from the CSV data in `stream`.
+(defun ensure-input-stream (stream-or-string)
+ (etypecase stream-or-string
+ (stream stream-or-string)
+ (string (make-string-input-stream stream-or-string))))
+
+(defun read-row
+ (&optional (stream-or-string *standard-input*) (eof-error-p t) eof-value)
+ "Read and return a row of fields from the CSV data in `stream-or-string`.
The result will be completely fresh.
@@ -109,43 +115,72 @@
signaled unless `eof-error-p` is false, in which case `eof-value` is returned.
"
- (check-type stream stream)
- (assert (input-stream-p stream) (stream)
- "Stream ~S is not an input stream." stream)
(check-delimiter)
- (read-row% stream *delimiter* eof-error-p eof-value))
+ (let ((stream (ensure-input-stream stream-or-string)))
+ (assert (input-stream-p stream) (stream)
+ "Stream ~S is not an input stream." stream)
+ (read-row% stream *delimiter* eof-error-p eof-value)))
-(defun read-rows (&optional (stream *standard-input*))
- "Read and return all CSV rows from the CSV data in `stream`.
+(defun read-rows (&optional (stream-or-string *standard-input*))
+ "Read and return all CSV rows from the CSV data in `stream-or-string`.
The result will be completely fresh.
"
- (check-type stream stream)
- (assert (input-stream-p stream) (stream)
- "Stream ~S is not an input stream." stream)
(check-delimiter)
- (loop :with delimiter = *delimiter*
- :for row = (read-row% stream delimiter nil :eof)
- :until (eql row :eof)
- :collect row))
+ (let ((stream (ensure-input-stream stream-or-string)))
+ (assert (input-stream-p stream) ()
+ "Stream ~S is not an input stream." stream)
+ (loop :with delimiter = *delimiter*
+ :for row = (read-row% stream delimiter nil :eof)
+ :until (eql row :eof)
+ :collect row)))
+
+
+(defmacro with-output-to-stream-or-string
+ ((symbol &optional (stream-or-null symbol)) &body body)
+ "Bind `symbol` to an output stream, run `body`, and return appropriately.
+
+ If `stream-or-null` is a stream, `symbol` will be bound to it and nothing will
+ be returned.
+
+ If `stream-or-null` is `nil`, `symbol` will be bound to a string output stream
+ and the resulting string will be returned.
+
+ "
+ (let ((want-string (gensym "WANT-STRING")))
+ `(let* ((,symbol ,stream-or-null)
+ (,want-string (null ,symbol))
+ (,symbol (or ,symbol (make-string-output-stream))))
+ ,@body
+ (if ,want-string
+ (get-output-stream-string ,symbol)
+ (values)))))
(defun write-row (row &optional (stream *standard-output*))
- "Write `row` to `stream` as CSV data."
- (check-type stream stream)
- (assert (output-stream-p stream) (stream)
- "Stream ~S is not an output stream." stream)
+ "Write `row` to `stream` as CSV data.
+
+ If `stream` is `nil`, the data will be returned as a fresh string instead.
+
+ "
(check-delimiter)
- (write-row% row stream *delimiter*)
- (values))
+ (check-type stream (or null stream))
+ (with-output-to-stream-or-string (stream)
+ (assert (output-stream-p stream) (stream)
+ "Stream ~S is not an output stream." stream)
+ (write-row% row stream *delimiter*)))
(defun write-rows (rows &optional (stream *standard-output*))
- "Write `rows` to `stream` as CSV data."
- (check-type stream stream)
- (assert (output-stream-p stream) (stream)
- "Stream ~S is not an output stream." stream)
+ "Write `rows` to `stream` as CSV data.
+
+ If `stream` is `nil`, the data will be returned as a fresh string instead.
+
+ "
(check-delimiter)
- (loop :with delimiter = *delimiter*
- :for row :in rows
- :do (write-row% row stream delimiter)))
-
+ (check-type stream (or null stream))
+ (with-output-to-stream-or-string (stream)
+ (assert (output-stream-p stream) (stream)
+ "Stream ~S is not an output stream." stream)
+ (loop :with delimiter = *delimiter*
+ :for row :in rows
+ :do (write-row% row stream delimiter))))
--- a/test/tests.lisp Tue Dec 25 23:18:43 2018 -0500
+++ b/test/tests.lisp Wed Dec 26 21:24:37 2018 -0500
@@ -1,163 +1,338 @@
-(in-package :trivial-csv/test)
+(in-package :conserve/test)
;;;; Utils --------------------------------------------------------------------
+(defun symb (&rest args)
+ (intern (apply #'concatenate 'string (mapcar #'princ-to-string args))))
+
(defmacro define-test (name &body body)
`(test ,(symb 'test- name)
- (let ((*package* ,*package*)
- (r #(255 0 0))
- (g #(0 255 0))
- (b #(0 0 255))
- (k #(0 0 0))
- (w #(255 255 255)))
- (declare (ignorable r g b k w))
+ (let ((*package* ,*package*))
,@body)))
-(defun make-image-array (initial-data)
- (make-array (list (length (elt initial-data 0))
- (length initial-data))
- :initial-contents (transpose initial-data)))
-
-(defmacro check (form expected-data expected-format expected-bit-depth)
- (with-gensyms (data format bit-depth)
- `(multiple-value-bind (,data ,format ,bit-depth) ,form
- (is (equalp (make-image-array ,expected-data) ,data))
- (is (eql ,expected-format ,format))
- (is (= ,expected-bit-depth ,bit-depth)))))
+(defmacro define-csv-test (name csv data &optional bindings)
+ `(define-test ,name
+ (let ,bindings
+ (let ((data ,data)
+ (csv (format nil ,csv)))
+ (is (equalp data (conserve:read-rows csv)))
+ (is (equalp csv (conserve:write-rows data nil)))))))
(defun run-tests ()
(1am:run))
-;;;; Tests --------------------------------------------------------------------
-(define-test 1x1-black-ascii-pbm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.ascii.pbm")
- '((0))
- :pbm
- 1))
+;;;; Compatibility Layer ------------------------------------------------------
+(defun cl-csv-write (data stream)
+ (cl-csv:write-csv data :stream stream :newline (string #\newline)))
+
+(defun cl-csv-write-string (data)
+ (cl-csv:write-csv data :stream nil :newline (string #\newline)))
-(define-test 1x1-black-ascii-pgm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.ascii.pgm")
- '((0))
- :pgm
- 255))
+(defun cl-csv-read (stream)
+ (cl-csv:read-csv stream
+ :newline (string #\newline)
+ :trim-outer-whitespace nil))
+
+(defun cl-csv-read-string (string)
+ (cl-csv:read-csv string
+ :newline (string #\newline)
+ :trim-outer-whitespace nil))
+
-(define-test 1x1-black-ascii-ppm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.ascii.ppm")
- `((,k))
- :ppm
- 255))
+(defun fare-csv-write (data stream)
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (fare-csv:write-csv-lines data stream)))
+
+(defun fare-csv-write-string (data)
+ (with-output-to-string (s)
+ (fare-csv-write data s)))
-(define-test 4x3-rgb.ascii-ppm
- (check (trivial-ppm:read-from-file "test/data/4x3-rgb.ascii.ppm")
- `((,r ,r ,r ,r)
- (,g ,g ,g ,g)
- (,b ,b ,b ,b))
- :ppm
- 255))
+(defun fix-fare-csv-empty-lines (rows)
+ (mapcar (lambda (row)
+ (if (null row)
+ (list "")
+ row))
+ rows))
+
+(defun fare-csv-read (stream)
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (fare-csv:read-csv-stream stream)))
+
+(defun fare-csv-read-string (string)
+ (fare-csv-read (make-string-input-stream string)))
-(define-test 1x1-black-binary-pbm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.binary.pbm")
- '((0))
- :pbm
- 1))
+;;;; Basic Tests --------------------------------------------------------------
+(define-csv-test simple-csv
+ "a,b,c~@
+ d,e,f~%"
+ '(("a" "b" "c")
+ ("d" "e" "f")))
+
+(define-csv-test empty
+ ""
+ '())
-(define-test 1x1-black-binary-pgm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.binary.pgm")
- '((0))
- :pgm
- 255))
+(define-csv-test blank-lines
+ "a~%~%c~%"
+ '(("a")
+ ("")
+ ("c")))
+
+(define-csv-test empty-fields
+ "a,,b~@
+ ,a,b~@
+ a,b,~@
+ ,,a,,~%"
+ '(("a" "" "b")
+ ("" "a" "b")
+ ("a" "b" "")
+ ("" "" "a" "" "")))
+
+(define-csv-test spaces
+ "a b, c,d ~%"
+ '(("a b"
+ " c"
+ "d ")))
-(define-test 1x1-black-binary-ppm
- (check (trivial-ppm:read-from-file "test/data/1x1-black.binary.ppm")
- `((,k))
- :ppm
- 255))
+(define-csv-test basic-quoting
+ "a,b,c~@
+ \"a,b\",c~%"
+ '(("a" "b" "c")
+ ("a,b" "c")))
+
+(define-csv-test quote-escaping
+ "foo,\"x\"\"y\",baz~@
+ \"\"\"start\",\"end\"\"\"~%"
+ '(("foo" "x\"y" "baz")
+ ("\"start" "end\"")))
+
+(define-csv-test quoted-newlines
+ "a,\"foo~%bar\",b~%"
+ `(("a" ,(format nil "foo~%bar") "b")))
-(define-test 4x3-rgb.binary-ppm
- (check (trivial-ppm:read-from-file "test/data/4x3-rgb.binary.ppm")
- `((,r ,r ,r ,r)
- (,g ,g ,g ,g)
- (,b ,b ,b ,b))
- :ppm
- 255))
+(define-csv-test other-delimiter
+ "a_b,c_\"foo_bar\"~%"
+ '(("a"
+ "b,c"
+ "foo_bar"))
+ ((conserve:*delimiter* #\_)))
+
+(define-test no-trailing-newline
+ (is (equal '(("a" "b")) (conserve:read-rows "a,b"))))
+
+(define-test read-single-row
+ (is (equal '("a" "b")
+ (conserve:read-row (format nil "a,b~%c,d") nil nil))))
+
+(define-test read-row-eof-value
+ (is (equal :eof (conserve:read-row "" nil :eof))))
+
+(define-test read-row-eof-error
+ (signals end-of-file (conserve:read-row "" t)))
-;;;; Fuzzer -------------------------------------------------------------------
-(defparameter *fuzz-test-count* 500)
+;;;; Fuzzing ------------------------------------------------------------------
+(defparameter *string-characters*
+ (format nil "abc\", ~%"))
+
+(defun random-char (&optional (string *string-characters*))
+ (aref string (random (length string))))
+
+(defun random-field (characters)
+ (with-output-to-string (s)
+ (dotimes (i characters)
+ (write-char (random-char) s))))
+
+(defun random-row (fields characters)
+ (loop :repeat fields :collect (random-field (random characters))))
+
+(defun random-data (rows fields characters)
+ (loop :repeat rows :collect (random-row (1+ (random fields))
+ characters)))
-(defun random-bit ()
- (random 2))
+(define-test fuzz-round-trip
+ (dotimes (i 500)
+ (let* ((data (random-data 100 10 15))
+ (output (conserve:write-rows data nil))
+ (round-tripped (conserve:read-rows output)))
+ (is (equal data round-tripped)))))
-(defun random-gray ()
- (random 256))
+(define-test fuzz-against-cl-csv
+ (dotimes (i 100)
+ (let* ((data (random-data 10 10 10))
+ (conserve-out (conserve:write-rows data nil))
+ (cl-csv-out (cl-csv-write-string data))
+ (conserve->cl-csv (cl-csv-read-string conserve-out))
+ (cl-csv->conserve (conserve:read-rows cl-csv-out)))
+ (is (= (length data) (length conserve->cl-csv)))
+ (is (= (length data) (length cl-csv->conserve)))
+ (is (equal data conserve->cl-csv))
+ (is (equal data cl-csv->conserve)))))
-(defun random-color ()
- (make-array 3 :initial-contents (list (random 256)
- (random 256)
- (random 256))))
+(define-test fuzz-against-fare-csv
+ (dotimes (i 100)
+ (let* ((data (random-data 10 10 10))
+ (conserve-out (conserve:write-rows data nil))
+ (fare-csv-out (fare-csv-write-string data))
+ (conserve->fare-csv (fix-fare-csv-empty-lines (fare-csv-read-string conserve-out)))
+ (fare-csv->conserve (conserve:read-rows fare-csv-out)))
+ (is (= (length data) (length conserve->fare-csv)))
+ (is (= (length data) (length fare-csv->conserve)))
+ (is (equal data conserve->fare-csv))
+ (is (equal data fare-csv->conserve)))))
+
-(defun random-format ()
- (ecase (random 3)
- (0 :pbm)
- (1 :pgm)
- (2 :ppm)))
+;;;; Benchmarking -------------------------------------------------------------
+(defun round-trip/conserve (data)
+ (conserve:read-rows (conserve:write-rows data nil)))
+
+(defun round-trip/cl-csv (data)
+ (cl-csv-read-string (cl-csv-write-string data)))
+
+(defun round-trip/fare-csv (data)
+ (fare-csv-read-string (fare-csv-write-string data)))
+
+
+(defun benchmark-round-trip/conserve (data)
+ (format t "~%Timing in-memory round trip for Conserve:~%")
+ (let ((result (time (round-trip/conserve data))))
+ (assert (equal data result))))
-(defun make-random-array ()
- (let* ((width (1+ (random 50)))
- (height (1+ (random 50)))
- (format (random-format))
- (data (make-array (list width height))))
- (dotimes (x width)
- (dotimes (y height)
- (setf (aref data x y)
- (ecase format
- (:pbm (random-bit))
- (:pgm (random-gray))
- (:ppm (random-color))))))
- (values data format)))
+(defun benchmark-round-trip/fare-csv (data)
+ (format t "~%Timing in-memory round trip for fare-csv:~%")
+ (let ((result (time (round-trip/fare-csv data))))
+ (assert (equal data (fix-fare-csv-empty-lines result)))))
+
+(defun benchmark-round-trip/cl-csv (data)
+ (format t "~%Timing in-memory round trip for cl-csv:~%")
+ (let ((result (time (round-trip/cl-csv data))))
+ (assert (equal data result))))
+
+
+(defun benchmark-round-trip ()
+ (let ((data (random-data 1000 100 500)))
+ (benchmark-round-trip/conserve data)
+ (benchmark-round-trip/fare-csv data)
+ (benchmark-round-trip/cl-csv data)))
-(define-test fuzz-ascii
- (dotimes (i *fuzz-test-count*)
- (multiple-value-bind (original original-format) (make-random-array)
- (write-to-file "test/data/fuzz.ascii" original
- :if-exists :supersede
- :format original-format
- :encoding :ascii)
- (multiple-value-bind (new new-format)
- (read-from-file "test/data/fuzz.ascii")
- (is (eql original-format new-format))
- (is (equalp original new))))))
+(defun write-file/conserve (data repetitions)
+ (with-open-file (s "test/data/large-conserve.csv"
+ :direction :output
+ :if-exists :supersede)
+ (loop :repeat repetitions :do (conserve:write-rows data s))))
+
+(defun write-file/cl-csv (data repetitions)
+ (with-open-file (s "test/data/large-cl-csv.csv"
+ :direction :output
+ :if-exists :supersede)
+ (loop :repeat repetitions :do
+ (cl-csv:write-csv data
+ :stream s
+ :newline (string #\newline)))))
+
+(defun write-file/fare-csv (data repetitions)
+ (with-open-file (s "test/data/large-fare-csv.csv"
+ :direction :output
+ :if-exists :supersede)
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (loop :repeat repetitions :do (fare-csv:write-csv-lines data s)))))
+
+(defvar *data* nil)
+(defparameter *verify-large-file-reads* t)
+(defparameter *repetitions* 30)
+(defparameter *rows* 1000)
+(defparameter *fields* 200)
+(defparameter *characters* 80)
+(defparameter *size* (* *repetitions* *rows* *fields* *characters* 3))
+
+(defun benchmark-large-file/write ()
+ (format t "~%Generating data~%")
+ (let ((data (time (random-data *rows* *fields* *characters*))))
+ (setf *data* data)
+
+ (format t "~%Timing large file write for Conserve:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (write-file/conserve data *repetitions*))
+
+ (format t "~%Timing large file write for fare-csv:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (write-file/fare-csv data *repetitions*))
+
+ (format t "~%Timing large file write for cl-csv:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (write-file/cl-csv data *repetitions*))
+
+ (values)))
+
-(define-test fuzz-binary
- (dotimes (i *fuzz-test-count*)
- (multiple-value-bind (original original-format) (make-random-array)
- (write-to-file "test/data/fuzz.binary" original
- :if-exists :supersede
- :format original-format
- :encoding :binary)
- (multiple-value-bind (new new-format)
- (read-from-file "test/data/fuzz.binary")
- (is (eql original-format new-format))
- (is (equalp original new))))))
+(defun read-file/conserve ()
+ (with-open-file (s "test/data/large-conserve.csv")
+ (if *verify-large-file-reads*
+ (loop
+ :for original :in *data*
+ :for row = (conserve:read-row s nil :eof)
+ :until (eql :eof row)
+ :do (assert (equal original row)))
+ (loop
+ :for row = (conserve:read-row s nil :eof)
+ :until (eql :eof row)))))
+
+(defun read-file/fare-csv ()
+ (with-open-file (s "test/data/large-fare-csv.csv")
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (if *verify-large-file-reads*
+ (loop
+ :for original :in *data*
+ :until (eql :eof (peek-char nil s nil :eof))
+ :do (assert (equal original (or (fare-csv:read-csv-line s) '("")))))
+ (loop
+ :until (eql :eof (peek-char nil s nil :eof))
+ :do (fare-csv:read-csv-line s))))))
-(define-test fuzz-convert
- (dotimes (i *fuzz-test-count*)
- (multiple-value-bind (original original-format) (make-random-array)
- (write-to-file "test/data/fuzz.convert.in" original
- :if-exists :supersede
- :format original-format
- :encoding :ascii)
- (uiop:run-program (list "convert" "-format" (ecase original-format
- (:ppm "ppm")
- (:pgm "pgm")
- (:pbm "pbm"))
- "test/data/fuzz.convert.in"
- "test/data/fuzz.convert.out"))
- (multiple-value-bind (new new-format)
- (read-from-file "test/data/fuzz.convert.out")
- (is (eql original-format new-format))
- (is (equalp original new))))))
+(defun read-file/cl-csv ()
+ (with-open-file (s "test/data/large-cl-csv.csv")
+ (handler-case
+ (if *verify-large-file-reads*
+ (loop
+ :for original :in *data*
+ :for row = (cl-csv:read-csv-row s
+ :newline (string #\newline)
+ :trim-outer-whitespace nil)
+ :do (assert (equal original row)))
+ (loop
+ (cl-csv:read-csv-row s
+ :newline (string #\newline)
+ :trim-outer-whitespace nil)))
+ (end-of-file () nil))))
+
+(defun benchmark-large-file/read ()
+ ;; circular list to make iterating easier in the readers
+ (setf (cdr (last *data*)) *data*)
+
+ (format t "~%Timing large file read for Conserve:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (read-file/conserve))
+
+ (format t "~%Timing large file read for fare-csv:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (read-file/fare-csv))
+
+ (format t "~%Timing large file read for cl-csv:~%")
+ #+sbcl (sb-ext:gc :full t)
+ (time (read-file/cl-csv))
+
+ (values))
+
+(defun benchmark-large-file ()
+ (unless (y-or-n-p
+ "This benchmark could require over ~:Dmb of hard disk space.~@
+ Do you want to proceed?"
+ (truncate *size* (* 1024 1024)))
+ (return-from benchmark-large-file))
+ (benchmark-large-file/write)
+ (benchmark-large-file/read))
+