# HG changeset patch # User Steve Losh # Date 1580493836 18000 # Node ID 745c3f963e842cf12bc88a1f013ea263d85c1870 # Parent 2a7f613576bc5065e52a8684a50f87b7fa8cf019 Add options for handling encodings in `lines` diff -r 2a7f613576bc -r 745c3f963e84 lisp/lines.lisp --- a/lisp/lines.lisp Fri Jan 31 11:09:09 2020 -0500 +++ b/lisp/lines.lisp Fri Jan 31 13:03:56 2020 -0500 @@ -35,8 +35,8 @@ (= (n matcher) i)) (defmethod map-matching-lines ((matcher single-line-matcher) function min max) - (declare (ignore min max)) - (funcall function (n matcher))) + (when (<= min (n matcher) max) + (funcall function (n matcher)))) (defmethod maximum ((matcher single-line-matcher)) (n matcher)) @@ -149,17 +149,39 @@ (setf (gethash i lines) line)))) :finally (return (values lines min max)))) -(defun run (line-designators) - (let ((matchers (parse-line-designators line-designators)) - (include-numbers *include-numbers*)) + +(defun output-line (line i) + (when i + (format *standard-output* "~D " i)) + (write-line line)) + +(defun run-slow (matchers) + (let ((include-numbers *include-numbers*)) (multiple-value-bind (lines min max) (collect-lines matchers) (dolist (matcher matchers) - (map-matching-lines matcher - (lambda (i) - (when include-numbers - (format *standard-output* "~D " i)) - (write-line (gethash i lines))) - min max)))) + (map-matching-lines + matcher + (lambda (i) + (output-line (gethash i lines) + (when include-numbers i))) + min max))))) + +(defun run-fast (matcher) + (loop + :with include-numbers = *include-numbers* + :for i :from *index-base* + :for line = (read-line *standard-input* nil) + :while line + :when (zerop (mod i 100000)) + :do (progn #+sbcl (sb-ext:gc)) + :when (matches-line-p matcher i) + :do (output-line line (when include-numbers i)))) + +(defun run (line-designators) + (let ((matchers (parse-line-designators line-designators))) + (if (= 1 (length matchers)) + (run-fast (first matchers)) + (run-slow matchers))) (values)) @@ -221,6 +243,7 @@ :long "version" :reduce (constantly t))) + (defparameter *option-one-based* (adopt:make-option 'one-based :result-key 'index-base @@ -256,6 +279,57 @@ :help-no "Do not add line number to the output (default).")) +(defparameter *option-input-encoding* + (adopt:make-option 'input-encoding + :help "Treat input as being encoded with ENC (default utf-8)." + :parameter "ENC" + :short #\i + :long "input-encoding" + :initial-value "utf-8" + :reduce #'adopt:last)) + +(defparameter *option-output-encoding* + (adopt:make-option 'output-encoding + :help "Output text encoded with ENC (default utf-8)." + :parameter "ENC" + :short #\o + :long "output-encoding" + :initial-value "utf-8" + :reduce #'adopt:last)) + +(defparameter *option-input-replacement* + (adopt:make-option 'input-replacement + :help "If an input character is not valid in the selected encoding, replace it with REP." + :parameter "REP" + :short #\r + :long "input-replacement" + :reduce #'adopt:last)) + +(defparameter *option-no-input-replacement* + (adopt:make-option 'no-input-replacement + :result-key 'input-replacement + :help "If an input character is not valid in the selected encoding, return an error (default)." + :short #\R + :long "no-input-replacement" + :reduce (constantly nil))) + +(defparameter *option-output-replacement* + (adopt:make-option 'output-replacement + :help "If an output character would not be not valid in the selected encoding, replace it with REP." + :parameter "REP" + :short #\q + :long "output-replacement" + :reduce #'adopt:last)) + +(defparameter *option-no-output-replacement* + (adopt:make-option 'no-output-replacement + :result-key 'output-replacement + :help "If an output character would not be valid in the selected encoding, return an error (default)." + :short #\Q + :long "no-output-replacement" + :reduce (constantly nil))) + + (adopt:define-string *help-text* "lines takes a string denoting which lines to print, and prints those lines ~ of standard input.") @@ -285,6 +359,15 @@ *option-version* *option-debug* *option-no-debug* + (adopt:make-group 'character-encodings + :title "Character Encodings" + :options (list + *option-input-encoding* + *option-input-replacement* + *option-no-input-replacement* + *option-output-encoding* + *option-output-replacement* + *option-no-output-replacement*)) (adopt:make-group 'line-numbering :title "Line Numbering" :options (list @@ -319,6 +402,12 @@ (with-user-abort:user-abort () (adopt:exit 130)))) +(defun determine-external-format (encoding replacement) + (let ((encoding (intern (string-upcase encoding) :keyword))) + (if (null replacement) + encoding + (list encoding :replacement replacement)))) + (defun toplevel () (exit-on-ctrl-c (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*) @@ -331,8 +420,20 @@ (when more (error "Unrecognized command line arguments: ~S" more)) (let ((*index-base* (gethash 'index-base options)) - (*include-numbers* (gethash 'include-numbers options))) - (run line-designators))))))))) + (*include-numbers* (gethash 'include-numbers options)) + (input-format (determine-external-format + (gethash 'input-encoding options) + (gethash 'input-replacement options))) + (output-format (determine-external-format + (gethash 'output-encoding options) + (gethash 'output-replacement options)))) + (with-open-file (*standard-input* "/dev/stdin" + :external-format input-format) + (with-open-file (*standard-output* "/dev/stdout" + :external-format output-format + :direction :output + :if-exists :append) + (run line-designators))))))))))) #; Scratch --------------------------------------------------------------------