66af3dc24580

Add `group-by` and silly bitset thing
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Tue, 08 Nov 2016 15:47:12 +0000
parents 1c5dff36922b
children 3d34f6a99a24
branches/tags (none)
files DOCUMENTATION.markdown losh.lisp make-docs.lisp package.lisp

Changes

--- a/DOCUMENTATION.markdown	Thu Nov 03 18:26:11 2016 +0000
+++ b/DOCUMENTATION.markdown	Tue Nov 08 15:47:12 2016 +0000
@@ -306,39 +306,6 @@
 
 Return the string used to represent `thing` when printing structurally.
 
-## Package `LOSH.DISTRIBUTIONS`
-
-Utilities for calculating statistical... things.
-
-### `FREQUENCIES` (function)
-
-    (FREQUENCIES SEQ &KEY (TEST 'EQL))
-
-Return a hash table containing the frequencies of the items in `seq`.
-
-  Uses `test` for the `:test` of the hash table.
-
-  Example:
-
-    (frequencies '(foo foo bar))
-    => {foo 2
-        bar 1}
-
-  
-
-### `PREFIX-SUMS` (function)
-
-    (PREFIX-SUMS LIST)
-
-Return a list of the prefix sums of the numbers in `list`.
-
-  Example:
-
-    (prefix-sums '(10 10 10 0 1))
-    => (10 20 30 30 31)
-
-  
-
 ## Package `LOSH.ELDRITCH-HORRORS`
 
 Abandon all hope, ye who enter here.
@@ -850,6 +817,76 @@
 
 Return a random boolean with `chance` probability of `t`.
 
+## Package `LOSH.SEQUENCES`
+
+Utilities for operating on sequences.
+
+### `FREQUENCIES` (function)
+
+    (FREQUENCIES SEQUENCE &KEY (TEST 'EQL))
+
+Return a hash table containing the frequencies of the items in `sequence`.
+
+  Uses `test` for the `:test` of the hash table.
+
+  Example:
+
+    (frequencies '(foo foo bar))
+    => {foo 2
+        bar 1}
+
+  
+
+### `GROUP-BY` (function)
+
+    (GROUP-BY FUNCTION SEQUENCE &KEY (TEST #'EQL) (KEY #'IDENTITY))
+
+Return a hash table of the elements of `sequence` grouped by `function`.
+
+  This function groups the elements of `sequence` into buckets.  The bucket for
+  an element is determined by calling `function` on it.
+
+  The result is a hash table (with test `test`) whose keys are the bucket
+  identifiers and whose values are lists of the elements in each bucket.  The
+  order of these lists is unspecified.
+
+  If `key` is given it will be called on each element before passing it to
+  `function` to produce the bucket identifier.  This does not effect what is
+  stored in the lists.
+
+  Examples:
+
+    (defparameter *items* '((1 foo) (1 bar) (2 cats) (3 cats)))
+
+    (group-by #'first *items*)
+    ; => { 1 ((1 foo) (1 bar))
+    ;      2 ((2 cats))
+    ;      3 ((3 cats)) }
+
+    (group-by #'second *items*)
+    ; => { foo  ((1 foo))
+    ;      bar  ((1 bar))
+    ;      cats ((2 cats) (3 cats)) }
+
+    (group-by #'evenp *items* :key #'first)
+    ; => { t   ((2 cats))
+    ;      nil ((1 foo) (1 bar) (3 cats)) }
+
+  
+
+### `PREFIX-SUMS` (function)
+
+    (PREFIX-SUMS SEQUENCE)
+
+Return a list of the prefix sums of the numbers in `sequence`.
+
+  Example:
+
+    (prefix-sums '(10 10 10 0 1))
+    => (10 20 30 30 31)
+
+  
+
 ## Package `LOSH.WEIGHTLISTS`
 
 A simple data structure for choosing random items with weighted probabilities.
--- a/losh.lisp	Thu Nov 03 18:26:11 2016 +0000
+++ b/losh.lisp	Tue Nov 08 15:47:12 2016 +0000
@@ -1181,9 +1181,9 @@
                            (keywordize-clause clause))))))
 
 
-;;;; Distributions
-(defun prefix-sums (list)
-  "Return a list of the prefix sums of the numbers in `list`.
+;;;; Sequences
+(defun prefix-sums (sequence)
+  "Return a list of the prefix sums of the numbers in `sequence`.
 
   Example:
 
@@ -1192,12 +1192,12 @@
 
   "
   (iterate
-    (for i :in list)
+    (for i :in-whatever sequence)
     (sum i :into s)
     (collect s)))
 
-(defun frequencies (seq &key (test 'eql))
-  "Return a hash table containing the frequencies of the items in `seq`.
+(defun frequencies (sequence &key (test 'eql))
+  "Return a hash table containing the frequencies of the items in `sequence`.
 
   Uses `test` for the `:test` of the hash table.
 
@@ -1210,10 +1210,49 @@
   "
   (iterate
     (with result = (make-hash-table :test test))
-    (for i :in-whatever seq)
+    (for i :in-whatever sequence)
     (incf (gethash i result 0))
     (finally (return result))))
 
+(defun group-by (function sequence &key (test #'eql) (key #'identity))
+  "Return a hash table of the elements of `sequence` grouped by `function`.
+
+  This function groups the elements of `sequence` into buckets.  The bucket for
+  an element is determined by calling `function` on it.
+
+  The result is a hash table (with test `test`) whose keys are the bucket
+  identifiers and whose values are lists of the elements in each bucket.  The
+  order of these lists is unspecified.
+
+  If `key` is given it will be called on each element before passing it to
+  `function` to produce the bucket identifier.  This does not effect what is
+  stored in the lists.
+
+  Examples:
+
+    (defparameter *items* '((1 foo) (1 bar) (2 cats) (3 cats)))
+
+    (group-by #'first *items*)
+    ; => { 1 ((1 foo) (1 bar))
+    ;      2 ((2 cats))
+    ;      3 ((3 cats)) }
+
+    (group-by #'second *items*)
+    ; => { foo  ((1 foo))
+    ;      bar  ((1 bar))
+    ;      cats ((2 cats) (3 cats)) }
+
+    (group-by #'evenp *items* :key #'first)
+    ; => { t   ((2 cats))
+    ;      nil ((1 foo) (1 bar) (3 cats)) }
+
+  "
+  (iterate
+    (with result = (make-hash-table :test test))
+    (for i :in-whatever sequence)
+    (push i (gethash (funcall function (funcall key i)) result))
+    (finally (return result))))
+
 
 ;;;; Debugging & Logging
 (defun pr (&rest args)
@@ -1356,6 +1395,58 @@
     (finding item :such-that (< n weight))))
 
 
+;;;; Bit Sets
+;;; Implementation of the sets-as-integers idea in the Common Lisp Recipes book.
+(deftype bset () '(integer 0))
+
+
+(defun bset-empty ()
+  0)
+
+(defun bset-contains-p (bset i)
+  (logbitp i bset))
+
+(defun bset-union (s1 s2)
+  (logior s1 s2))
+
+(defun bset-intersection (s1 s2)
+  (logand s1 s2))
+
+(defun bset-difference (s1 s2)
+  (logandc2 s1 s2))
+
+(defun bset-insert (bset i)
+  (dpb 1 (byte 1 i) bset))
+
+(defun bset-remove (bset i)
+  (dpb 0 (byte 1 i) bset))
+
+(defun bset-count (bset)
+  (logcount bset))
+
+(defun bset= (s1 s2)
+  (= s1 s2))
+
+(defun bset-empty-p (bset)
+  (zerop bset))
+
+
+(defun make-bset (&rest is)
+  (reduce #'bset-insert is :initial-value 0))
+
+
+(defun bset-to-list (bset)
+  (iterate
+    (for i :from 0)
+    (for s :first bset :then (ash s -1))
+    (until (zerop s))
+    (when (logbitp 0 s)
+      (collect i))))
+
+(defun list-to-bset (list)
+  (apply #'make-bset list))
+
+
 ;;;; Licensing
 ;;; Original code from @dk_jackdaniel:
 ;;; http://paste.lisp.org/display/327154
--- a/make-docs.lisp	Thu Nov 03 18:26:11 2016 +0000
+++ b/make-docs.lisp	Tue Nov 08 15:47:12 2016 +0000
@@ -6,7 +6,6 @@
         "LOSH.ARRAYS"
         "LOSH.CONTROL-FLOW"
         "LOSH.DEBUGGING"
-        "LOSH.DISTRIBUTIONS"
         "LOSH.ELDRITCH-HORRORS"
         "LOSH.FUNCTIONS"
         "LOSH.ITERATE"
@@ -16,6 +15,7 @@
         "LOSH.MUTATION"
         "LOSH.QUEUES"
         "LOSH.RANDOM"
+        "LOSH.SEQUENCES"
         "LOSH.WEIGHTLISTS"
 
         ))
--- a/package.lisp	Thu Nov 03 18:26:11 2016 +0000
+++ b/package.lisp	Tue Nov 08 15:47:12 2016 +0000
@@ -141,11 +141,12 @@
     #:skip-origin
     #:macroexpand-iterate))
 
-(defpackage #:losh.distributions
-  (:documentation "Utilities for calculating statistical... things.")
+(defpackage #:losh.sequences
+  (:documentation "Utilities for operating on sequences.")
   (:export
     #:prefix-sums
-    #:frequencies))
+    #:frequencies
+    #:group-by))
 
 (defpackage #:losh.debugging
   (:documentation "Utilities for figuring out what the hell is going on.")
@@ -185,7 +186,7 @@
   (#:losh.arrays
    #:losh.control-flow
    #:losh.debugging
-   #:losh.distributions
+   #:losh.sequences
    #:losh.eldritch-horrors
    #:losh.functions
    #:losh.iterate