745c3f963e84

Add options for handling encodings in `lines`
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Fri, 31 Jan 2020 13:03:56 -0500
parents 2a7f613576bc
children 6a7f7a6d7532
branches/tags (none)
files lisp/lines.lisp

Changes

--- a/lisp/lines.lisp	Fri Jan 31 11:09:09 2020 -0500
+++ b/lisp/lines.lisp	Fri Jan 31 13:03:56 2020 -0500
@@ -35,8 +35,8 @@
   (= (n matcher) i))
 
 (defmethod map-matching-lines ((matcher single-line-matcher) function min max)
-  (declare (ignore min max))
-  (funcall function (n matcher)))
+  (when (<= min (n matcher) max)
+    (funcall function (n matcher))))
 
 (defmethod maximum ((matcher single-line-matcher))
   (n matcher))
@@ -149,17 +149,39 @@
                    (setf (gethash i lines) line))))
     :finally (return (values lines min max))))
 
-(defun run (line-designators)
-  (let ((matchers (parse-line-designators line-designators))
-        (include-numbers *include-numbers*))
+
+(defun output-line (line i)
+  (when i
+    (format *standard-output* "~D " i))
+  (write-line line))
+
+(defun run-slow (matchers)
+  (let ((include-numbers *include-numbers*))
     (multiple-value-bind (lines min max) (collect-lines matchers)
       (dolist (matcher matchers)
-        (map-matching-lines matcher
-                            (lambda (i)
-                              (when include-numbers
-                                (format *standard-output* "~D " i))
-                              (write-line (gethash i lines)))
-                            min max))))
+        (map-matching-lines
+          matcher
+          (lambda (i)
+            (output-line (gethash i lines)
+                         (when include-numbers i)))
+          min max)))))
+
+(defun run-fast (matcher)
+  (loop
+    :with include-numbers = *include-numbers*
+    :for i :from *index-base*
+    :for line = (read-line *standard-input* nil)
+    :while line
+    :when (zerop (mod i 100000))
+    :do (progn #+sbcl (sb-ext:gc))
+    :when (matches-line-p matcher i)
+    :do (output-line line (when include-numbers i))))
+
+(defun run (line-designators)
+  (let ((matchers (parse-line-designators line-designators)))
+    (if (= 1 (length matchers))
+      (run-fast (first matchers))
+      (run-slow matchers)))
   (values))
 
 
@@ -221,6 +243,7 @@
     :long "version"
     :reduce (constantly t)))
 
+
 (defparameter *option-one-based*
   (adopt:make-option 'one-based
     :result-key 'index-base
@@ -256,6 +279,57 @@
     :help-no "Do not add line number to the output (default)."))
 
 
+(defparameter *option-input-encoding*
+  (adopt:make-option 'input-encoding
+    :help "Treat input as being encoded with ENC (default utf-8)."
+    :parameter "ENC"
+    :short #\i
+    :long "input-encoding"
+    :initial-value "utf-8"
+    :reduce #'adopt:last))
+
+(defparameter *option-output-encoding*
+  (adopt:make-option 'output-encoding
+    :help "Output text encoded with ENC (default utf-8)."
+    :parameter "ENC"
+    :short #\o
+    :long "output-encoding"
+    :initial-value "utf-8"
+    :reduce #'adopt:last))
+
+(defparameter *option-input-replacement*
+  (adopt:make-option 'input-replacement
+    :help "If an input character is not valid in the selected encoding, replace it with REP."
+    :parameter "REP"
+    :short #\r
+    :long "input-replacement"
+    :reduce #'adopt:last))
+
+(defparameter *option-no-input-replacement*
+  (adopt:make-option 'no-input-replacement
+    :result-key 'input-replacement
+    :help "If an input character is not valid in the selected encoding, return an error (default)."
+    :short #\R
+    :long "no-input-replacement"
+    :reduce (constantly nil)))
+
+(defparameter *option-output-replacement*
+  (adopt:make-option 'output-replacement
+    :help "If an output character would not be not valid in the selected encoding, replace it with REP."
+    :parameter "REP"
+    :short #\q
+    :long "output-replacement"
+    :reduce #'adopt:last))
+
+(defparameter *option-no-output-replacement*
+  (adopt:make-option 'no-output-replacement
+    :result-key 'output-replacement
+    :help "If an output character would not be valid in the selected encoding, return an error (default)."
+    :short #\Q
+    :long "no-output-replacement"
+    :reduce (constantly nil)))
+
+
 (adopt:define-string *help-text*
   "lines takes a string denoting which lines to print, and prints those lines ~
    of standard input.")
@@ -285,6 +359,15 @@
                 *option-version*
                 *option-debug*
                 *option-no-debug*
+                (adopt:make-group 'character-encodings
+                                  :title "Character Encodings"
+                                  :options (list
+                                             *option-input-encoding*
+                                             *option-input-replacement*
+                                             *option-no-input-replacement*
+                                             *option-output-encoding*
+                                             *option-output-replacement*
+                                             *option-no-output-replacement*))
                 (adopt:make-group 'line-numbering
                                   :title "Line Numbering"
                                   :options (list
@@ -319,6 +402,12 @@
      (with-user-abort:user-abort () (adopt:exit 130))))
 
 
+(defun determine-external-format (encoding replacement)
+  (let ((encoding (intern (string-upcase encoding) :keyword)))
+    (if (null replacement)
+      encoding
+      (list encoding :replacement replacement))))
+
 (defun toplevel ()
   (exit-on-ctrl-c
     (multiple-value-bind (arguments options) (adopt:parse-options-or-exit *ui*)
@@ -331,8 +420,20 @@
                (when more
                  (error "Unrecognized command line arguments: ~S" more))
                (let ((*index-base* (gethash 'index-base options))
-                     (*include-numbers* (gethash 'include-numbers options)))
-                 (run line-designators)))))))))
+                     (*include-numbers* (gethash 'include-numbers options))
+                     (input-format (determine-external-format
+                                     (gethash 'input-encoding options)
+                                     (gethash 'input-replacement options)))
+                     (output-format (determine-external-format
+                                      (gethash 'output-encoding options)
+                                      (gethash 'output-replacement options))))
+                 (with-open-file (*standard-input* "/dev/stdin"
+                                                   :external-format input-format)
+                   (with-open-file (*standard-output* "/dev/stdout"
+                                                      :external-format output-format
+                                                      :direction :output
+                                                      :if-exists :append)
+                     (run line-designators)))))))))))
 
 
 #; Scratch --------------------------------------------------------------------