Remove unnecessary consing
author |
Steve Losh <steve@stevelosh.com> |
date |
Tue, 25 Dec 2018 22:34:30 -0500 |
parents |
a4abd0534b7d
|
children |
c50aa06f13bd
|
branches/tags |
(none) |
files |
.hgignore src/bench.lisp src/main.lisp |
Changes
--- a/.hgignore Tue Dec 25 14:32:50 2018 -0500
+++ b/.hgignore Tue Dec 25 22:34:30 2018 -0500
@@ -1,7 +1,5 @@
syntax: glob
lisp.prof
scratch.lisp
-*.log
docs/build
-images
-test/data/fuzz.*
+test/large*
--- a/src/bench.lisp Tue Dec 25 14:32:50 2018 -0500
+++ b/src/bench.lisp Tue Dec 25 22:34:30 2018 -0500
@@ -1,20 +1,25 @@
(in-package :trivial-csv)
+(defparameter *field-length* 50)
+(defparameter *row-length* 50)
+(defparameter *data-rows* 10000)
+(defparameter *data-repetitions* 10)
+
(defun random-char (string)
(aref string (random (length string))))
(defun random-field ()
(with-output-to-string (s)
- (dotimes (i (random 100))
+ (dotimes (i (random *field-length*))
(write-char (random-char (concatenate 'string (string #\newline)
" abcdefghijklmnop,\""))
s))))
(defun random-row ()
- (loop :repeat (1+ (random 20)) :collect (random-field)))
+ (loop :repeat (1+ (random *row-length*)) :collect (random-field)))
-(defun random-data (rows)
- (loop :repeat rows :collect (random-row)))
+(defun random-data ()
+ (loop :repeat *data-rows* :collect (random-row)))
(defun bench-this (data)
(let* ((str (make-string-input-stream
@@ -23,7 +28,7 @@
(result (time (read-rows str))))
(equal data result)))
-(defun bench-other (data)
+(defun bench-cl-csv (data)
(let* ((str (make-string-input-stream
(with-output-to-string (s)
(time (cl-csv:write-csv data :stream s)))))
@@ -31,41 +36,58 @@
(equal data result)))
(defun bench ()
- (let ((data (random-data 5000)))
+ (let ((data (random-data)))
(write-line "MINE")
(bench-this data)
- (write-line "OTHER")
- (bench-other data))
+ (write-line "cl-csv")
+ (bench-cl-csv data))
)
-(defparameter *data* nil)
+(defvar *data* nil)
(defun generate-large-data ()
- (setf *data* (random-data 5000)))
+ (setf *data* (random-data)))
(defun write-file-this ()
(with-open-file (s "test/large-this.csv"
:direction :output
:if-exists :supersede)
(time
- (loop :repeat 10 :do (write-rows *data* s)))))
+ (loop :repeat *data-repetitions* :do
+ (write-rows *data* s)))))
-(defun write-file-other ()
- (with-open-file (s "test/large-other.csv"
+(defun write-file-cl-csv ()
+ (with-open-file (s "test/large-cl-csv.csv"
:direction :output
:if-exists :supersede)
- (time (loop :repeat 10 :do (cl-csv:write-csv *data*
- :stream s
- :newline (string #\newline))))))
+ (time (loop :repeat *data-repetitions* :do
+ (cl-csv:write-csv *data*
+ :stream s
+ :newline (string #\newline))))))
+(defun write-file-fare ()
+ (with-open-file (s "test/large-fare.csv"
+ :direction :output
+ :if-exists :supersede)
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (time (loop :repeat *data-repetitions* :do
+ (fare-csv:write-csv-lines *data* s))))))
(defun bench-write-file ()
(write-line "Generating data.")
(time (generate-large-data))
+
(write-line "Benchmarking this (writing).")
+ #+sbcl (sb-ext:gc :full t)
(write-file-this)
- (write-line "Benchmarking other (writing).")
- (write-file-other))
+
+ (write-line "Benchmarking cl-csv (writing).")
+ #+sbcl (sb-ext:gc :full t)
+ (write-file-cl-csv)
+
+ (write-line "Benchmarking fare-csv (writing).")
+ #+sbcl (sb-ext:gc :full t)
+ (write-file-fare))
(defun read-file-this ()
@@ -77,28 +99,67 @@
(setf data *data*))
(pop data))
:until (eql :eof row)
+ :summing 1
:do (assert (equal expected-row row))))))
-(defun read-file-other ()
- (with-open-file (s "test/large-other.csv")
- (time (handler-case
- (loop
- :with data = *data*
- :for row = (cl-csv:read-csv-row s
- :newline (string #\newline)
- :trim-outer-whitespace nil)
- :for expected-row = (progn (when (null data)
- (setf data *data*))
- (pop data))
- :do (assert (equal expected-row row)))
- (end-of-file () nil)))))
+(defun read-file-cl-csv ()
+ (with-open-file (s "test/large-cl-csv.csv")
+ (let ((result 0))
+ (time (handler-case
+ (loop
+ :with data = *data*
+ :for row = (cl-csv:read-csv-row s
+ :newline (string #\newline)
+ :trim-outer-whitespace nil)
+ :for expected-row = (progn (when (null data)
+ (setf data *data*))
+ (pop data))
+ :do (incf result) (assert (equal expected-row row)))
+ (end-of-file () result))))))
+
+;; There are a couple of problems with fare-csv that make it annoying to work
+;; with:
+;;
+;; fare-csv:read-csv-line can't tell you the difference between '("") and eof so
+;; we have to check for it manually to know when we're done. This is only
+;; a problem when we're reading line by line.
+;;
+;; fare-csv also can't roundtrip '(""). CSV itself is incapable of
+;; differentiating '() and '("") unless you force quoting, but fare-csv chooses
+;; '() over '(""). Why would you sell out the marginally-useful case (a 1-col
+;; CSV) in favor of the utterly useless case (a 0-col CSV)?
+(defun read-file-fare ()
+ (with-open-file (s "test/large-fare.csv")
+ (fare-csv:with-rfc4180-csv-syntax ()
+ (time (loop
+ :with data = *data*
+ :for eof = (peek-char nil s nil :eof)
+ :for row = (fare-csv:read-csv-line s)
+ :for expected-row = (progn (when (null data)
+ (setf data *data*))
+ (pop data))
+ :until (eql :eof eof)
+ :summing 1
+ :do (if (equal expected-row '(""))
+ (assert (equal nil row))
+ (assert (equal expected-row row))))))))
(defun bench-read-file ()
(write-line "Benchmarking this (reading).")
- (read-file-this)
- (write-line "Benchmarking other (reading).")
- (read-file-other))
+ #+sbcl (sb-ext:gc :full t)
+ (format t "Read ~D rows.~2%" (read-file-this))
+
+ (write-line "Benchmarking cl-csv (reading).")
+ #+sbcl (sb-ext:gc :full t)
+ (format t "Read ~D rows.~2%" (read-file-cl-csv))
+
+ (write-line "Benchmarking fare-csv (reading).")
+ #+sbcl (sb-ext:gc :full t)
+ (format t "Read ~D rows.~2%" (read-file-fare))
+
+ (values))
(defun bench-file ()
(bench-write-file)
(bench-read-file))
+
--- a/src/main.lisp Tue Dec 25 14:32:50 2018 -0500
+++ b/src/main.lisp Tue Dec 25 22:34:30 2018 -0500
@@ -8,6 +8,10 @@
;;;; Reading ------------------------------------------------------------------
+;;; We reuse a single string output stream for all the fields in a row, rather
+;;; than having a fresh one per field. It's a little more tedious but it's
+;;; *significantly* less consing (e.g. 500mb versus 1.5gb).
+
(defun read-char-if (char stream &optional (eof-error-p t) eof-value)
"If the next character in `stream` is `char`, read and return it. Otherwise, return `nil`."
(cond
@@ -15,59 +19,61 @@
((eql char eof-value) char)
(t nil)))
-(defun read-unquoted-field (stream delimiter)
- "Read an unquoted field (but not the ending field delimiter) from `stream`."
- (with-output-to-string (result)
- (loop :for next = (peek-char nil stream nil delimiter)
- :until (cond
- ((char= next delimiter) t)
- ((char= next #\newline) t)
- ((char= next #\") (error "Unquoted field contains quote."))
- (t nil))
- :do (write-char (read-char stream) result))))
+(defun read-unquoted-field (stream delimiter result)
+ "Read an unquoted field (but not the ending field delimiter) from `stream` into `result`."
+ (loop :for next = (peek-char nil stream nil delimiter)
+ :until (cond
+ ((char= next delimiter) t)
+ ((char= next #\newline) t)
+ ((char= next #\") (error "Unquoted field contains quote."))
+ (t nil))
+ :do (write-char (read-char stream) result)))
-(defun read-quoted-field (stream delimiter)
- "Read a quoted field (but not the ending field delimiter) from `stream`."
+(defun read-quoted-field (stream delimiter result)
+ "Read a quoted field (but not the ending field delimiter) from `stream` into `result`."
(declare (ignore delimiter))
(read-char stream) ; chomp initial quote
- (with-output-to-string (result)
- (loop :for char = (read-char stream)
- :until (and (char= #\" char)
- (not (read-char-if #\" stream nil)))
- :do (write-char char result))))
+ (loop :for char = (read-char stream)
+ :until (and (char= #\" char)
+ (not (read-char-if #\" stream nil)))
+ :do (write-char char result)))
-(defun read-field (stream delimiter)
- "Read and return a single field from `stream`."
+(defun read-field (stream delimiter result)
+ "Read and return a single field from `stream` into `result`."
(let* ((field (case (peek-char nil stream nil :eof)
- (#\" (read-quoted-field stream delimiter))
+ (#\" (read-quoted-field stream delimiter result) t)
(#\newline (read-char stream) nil)
(:eof nil)
- (t (read-unquoted-field stream delimiter))))
+ (t (read-unquoted-field stream delimiter result) t)))
(done (cond
((null field) t) ; empty field at the end
((read-char-if delimiter stream nil) nil) ; normal field
((read-char-if #\newline stream nil #\newline) t) ; last field
(t (error "Bad data after field ~S: ~S"
field (peek-char nil stream))))))
- (values (or field "") done)))
+ (values (if field
+ (get-output-stream-string result)
+ "")
+ done)))
(defun read-row% (stream delimiter eof-error-p eof-value)
(if (eql :eof (peek-char nil stream eof-error-p :eof))
eof-value
- (loop
- :with field :with done
- :do (setf (values field done) (read-field stream delimiter))
- :collect field
- :until done)))
+ (loop :with field
+ :with done
+ :with result = (make-string-output-stream)
+ :do (setf (values field done) (read-field stream delimiter result))
+ :collect field
+ :until done)))
;;;; Writing ------------------------------------------------------------------
(defun field-needs-quoting-p (field delimiter)
- (find-if (lambda (char)
- (or (char= char #\newline)
- (char= char #\")
- (char= char delimiter)))
- field))
+ (some (lambda (char)
+ (or (char= char #\newline)
+ (char= char #\")
+ (char= char delimiter)))
+ field))
(defun write-quoted-field (field stream)
(write-char #\" stream)
@@ -96,12 +102,29 @@
;;;; API ----------------------------------------------------------------------
(defun read-row (&optional (stream *standard-input*) (eof-error-p t) eof-value)
- "Read and return a row of fields from `stream`."
+ "Read and return a row of fields from the CSV data in `stream`.
+
+ The result will be completely fresh.
+
+ If the end of file for the stream is encountered immediately, an error is
+ signaled unless `eof-error-p` is false, in which case `eof-value` is returned.
+
+ "
+ (check-type stream stream)
+ (assert (input-stream-p stream) (stream)
+ "Stream ~S is not an input stream." stream)
(check-delimiter)
(read-row% stream *delimiter* eof-error-p eof-value))
(defun read-rows (&optional (stream *standard-input*))
- "Read and return all rows from `stream`."
+ "Read and return all CSV rows from the CSV data in `stream`.
+
+ The result will be completely fresh.
+
+ "
+ (check-type stream stream)
+ (assert (input-stream-p stream) (stream)
+ "Stream ~S is not an input stream." stream)
(check-delimiter)
(loop :with delimiter = *delimiter*
:for row = (read-row% stream delimiter nil :eof)
@@ -109,11 +132,13 @@
:collect row))
(defun write-row (row &optional (stream *standard-output*))
+ "Write `row` to `stream` as CSV data."
(check-delimiter)
(write-row% row stream *delimiter*)
(values))
(defun write-rows (rows &optional (stream *standard-output*))
+ "Write `rows` to `stream` as CSV data."
(check-delimiter)
(loop :with delimiter = *delimiter*
:for row :in rows