Add options for handling encodings in `lines`
author |
Steve Losh <steve@stevelosh.com> |
date |
Fri, 31 Jan 2020 13:03:56 -0500 |
parents |
2a7f613576bc
|
children |
6a7f7a6d7532
|
branches/tags |
(none) |
files |
lisp/lines.lisp |
Changes
--- a/lisp/lines.lisp Fri Jan 31 11:09:09 2020 -0500
+++ b/lisp/lines.lisp Fri Jan 31 13:03:56 2020 -0500
@@ -35,8 +35,8 @@
(= (n matcher) i))
(defmethod map-matching-lines ((matcher single-line-matcher) function min max)
- (declare (ignore min max))
- (funcall function (n matcher)))
+ (when (<= min (n matcher) max)
+ (funcall function (n matcher))))
(defmethod maximum ((matcher single-line-matcher))
(n matcher))
@@ -149,17 +149,39 @@
(setf (gethash i lines) line))))
:finally (return (values lines min max))))
-(defun run (line-designators)
- (let ((matchers (parse-line-designators line-designators))
- (include-numbers *include-numbers*))
+
+(defun output-line (line i)
+ (when i
+ (format *standard-output* "~D " i))
+ (write-line line))
+
+(defun run-slow (matchers)
+ (let ((include-numbers *include-numbers*))
(multiple-value-bind (lines min max) (collect-lines matchers)
(dolist (matcher matchers)
- (map-matching-lines matcher
- (lambda (i)
- (when include-numbers
- (format *standard-output* "~D " i))
- (write-line (gethash i lines)))
- min max))))
+ (map-matching-lines
+ matcher
+ (lambda (i)
+ (output-line (gethash i lines)
+ (when include-numbers i)))
+ min max)))))
+
+(defun run-fast (matcher)
+ (loop
+ :with include-numbers = *include-numbers*
+ :for i :from *index-base*
+ :for line = (read-line *standard-input* nil)
+ :while line
+ :when (zerop (mod i 100000))
+ :do (progn #+sbcl (sb-ext:gc))
+ :when (matches-line-p matcher i)
+ :do (output-line line (when include-numbers i))))
+
+(defun run (line-designators)
+ (let ((matchers (parse-line-designators line-designators)))
+ (if (= 1 (length matchers))
+ (run-fast (first matchers))
+ (run-slow matchers)))
(values))
@@ -221,6 +243,7 @@
:long "version"
:reduce (constantly t)))
+
(defparameter *option-one-based*
(adopt:make-option 'one-based
:result-key 'index-base
@@ -256,6 +279,57 @@
:help-no "Do not add line number to the output (default)."))
+(defparameter *option-input-encoding*
+ (adopt:make-option 'input-encoding
+ :help "Treat input as being encoded with ENC (default utf-8)."
+ :parameter "ENC"
+ :short #\i
+ :long "input-encoding"
+ :initial-value "utf-8"
+ :reduce #'adopt:last))
+
+(defparameter *option-output-encoding*
+ (adopt:make-option 'output-encoding
+ :help "Output text encoded with ENC (default utf-8)."
+ :parameter "ENC"
+ :short #\o
+ :long "output-encoding"
+ :initial-value "utf-8"
+ :reduce #'adopt:last))
+
+(defparameter *option-input-replacement*
+ (adopt:make-option 'input-replacement
+ :help "If an input character is not valid in the selected encoding, replace it with REP."
+ :parameter "REP"
+ :short #\r
+ :long "input-replacement"
+ :reduce #'adopt:last))
+
+(defparameter *option-no-input-replacement*
+ (adopt:make-option 'no-input-replacement
+ :result-key 'input-replacement
+ :help "If an input character is not valid in the selected encoding, return an error (default)."
+ :short #\R
+ :long "no-input-replacement"
+ :reduce (constantly nil)))
+
+(defparameter *option-output-replacement*
+ (adopt:make-option 'output-replacement
+ :help "If an output character would not be not valid in the selected encoding, replace it with REP."
+ :parameter "REP"
+ :short #\q
+ :long "output-replacement"
+ :reduce #'adopt:last))
+
+(defparameter *option-no-output-replacement*
+ (adopt:make-option 'no-output-replacement
+ :result-key 'output-replacement
+ :help "If an output character would not be valid in the selected encoding, return an error (default)."
+ :short #\Q
+ :long "no-output-replacement"
+ :reduce (constantly nil)))
+
+
(adopt:define-string *help-text*
"lines takes a string denoting which lines to print, and prints those lines ~
of standard input.")
@@ -285,6 +359,15 @@
*option-version*
*option-debug*
*option-no-debug*
+ (adopt:make-group 'character-encodings
+ :title "Character Encodings"
+ :options (list
+ *option-input-encoding*
+ *option-input-replacement*
+ *option-no-input-replacement*
+ *option-output-encoding*
+ *option-output-replacement*
+ *option-no-output-replacement*))
(adopt:make-group 'line-numbering
:title "Line Numbering"
:options (list
@@ -319,6 +402,12 @@
(with-user-abort:user-abort () (adopt:exit 130))))
+(defun determine-external-format (encoding replacement)
+ (let ((encoding (intern (string-upcase encoding) :keyword)))
+ (if (null replacement)
+ encoding
+ (list encoding :replacement replacement))))
+
(defun toplevel ()
(exit-on-ctrl-c
(multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
@@ -331,8 +420,20 @@
(when more
(error "Unrecognized command line arguments: ~S" more))
(let ((*index-base* (gethash 'index-base options))
- (*include-numbers* (gethash 'include-numbers options)))
- (run line-designators)))))))))
+ (*include-numbers* (gethash 'include-numbers options))
+ (input-format (determine-external-format
+ (gethash 'input-encoding options)
+ (gethash 'input-replacement options)))
+ (output-format (determine-external-format
+ (gethash 'output-encoding options)
+ (gethash 'output-replacement options))))
+ (with-open-file (*standard-input* "/dev/stdin"
+ :external-format input-format)
+ (with-open-file (*standard-output* "/dev/stdout"
+ :external-format output-format
+ :direction :output
+ :if-exists :append)
+ (run line-designators)))))))))))
#; Scratch --------------------------------------------------------------------