# HG changeset patch # User Steve Losh # Date 1633110051 14400 # Node ID 2d0b4dae45a0c7dc4a9211e770be594c1ce6c0e8 # Parent adb24692cb3df9b1ab5fa6c0f59cdecaebe1a17a Draft diff -r adb24692cb3d -r 2d0b4dae45a0 content/blog/2021/04/clos-dependent-maintenance.markdown --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/content/blog/2021/04/clos-dependent-maintenance.markdown Fri Oct 01 13:40:51 2021 -0400 @@ -0,0 +1,496 @@ +(:title "Using the CLOS Dependent Maintenance Protocol" + :snip "Handling all the edge cases." + :date "2021-04-26T16:10:00Z" + :draft t) + +TODO: fix times!!!!!!!!!!!!!!!!!1 + +Recently I've been working on a Common Lisp library that makes use of the +[Metaobject Protocol](http://metamodular.com/CLOS-MOP/). I ran into a few edge +cases and it took a while for me to figure out how to solve them, so I wanted to +write down what I learned in case anyone else might find it useful. This post +is an expanded version of [a Reddit thread][reddit] I posted on the topic +a while ago. + +[reddit]: https://www.reddit.com/r/Common_Lisp/comments/kljyg1/need_advice_on_how_to_handle_metaclass_option/ + +
+ +## Setting the Stage + +Before we can see the problem, we need a simple example. We'll use the +`monitored-class` metaclass from [The Art of the Metaobject Protocol][amop] +(pages 96-97). Using this class as a metaclass will log all slot reads and +writes, which could be useful for auditing access to certain objects. +[amop]: https://en.wikipedia.org/wiki/The_Art_of_the_Metaobject_Protocol + +Before we get started, we'll need [Closer to MOP][closer-mop] as an +implementation compatibility layer: + +[closer-mop]: https://github.com/pcostanza/closer-mop + +```lisp +(ql:quickload :closer-mop) +``` + +First we define the metaclass: + +```lisp +(defclass monitored-class (standard-class) + ()) +``` + +Next we'll explicitly say that it's okay for a monitored class to have +superclasses that are standard classes: + +```lisp +(defmethod c2mop:validate-superclass + ((class monitored-class) (superclass standard-class)) + t) +``` + +And now we can define the actual monitoring functionality. We'll use `:before` +methods on `slot-value-using-class` and its `setf` version to log the reads and +writes of all slots: + +```lisp +(defmethod c2mop:slot-value-using-class :before + ((class monitored-class) instance slot) + (format t "Reading slot ~A of ~A at ~A.~%" + (c2mop:slot-definition-name slot) instance (get-universal-time))) + +(defmethod (setf c2mop:slot-value-using-class) :before + (new-value (class monitored-class) instance slot) + (format t "Setting slot ~A of ~A to ~S at ~A.~%" + (c2mop:slot-definition-name slot) instance new-value (get-universal-time))) +``` + +With that complete, we can define a new monitored class: + +```lisp +(defclass user () + ((id :initarg :id :accessor id) + (name :initarg :name :accessor name)) + (:metaclass monitored-class)) +``` + +And now we can see it in action: + +```lisp +(defparameter *u* (make-instance 'user :id 1 :name "sjl")) +; => Setting slot ID of # to 1 at 3828527923. +; => Setting slot NAME of # to "sjl" at 3828527923. +; => *U* + +(id *u*) +; => Reading slot ID of # at 3828527937. +; => 1 + +(setf (name *u*) "Steve") +; => Setting slot NAME of # to "Steve" at 3828527946. +; => "Steve" +``` + +## Adding More Flexibility + +Now that we have a toy example working, let's make it a little more flexible. +Instead of always generating a string and writing it to standard out, we'll +allow users to provide a `:monitoring-function` as a class option that will +receive the data and can do whatever it wants, for example: + +* Logging to syslog instead of standard out. +* Inserting a row into a Postgres database as an audit log. +* Tracking read/write counts in a hash table to find slots that are written more + often than they're read and vice versa. + +A monitoring function will receive 2 arguments (the instance and slot name), +plus an optional third argument when a slot is written (the new value). We can +make a default monitoring function that works the same way as before: + +```lisp +(defun log-slot-access (instance slot-name &optional (new-value nil new-value?)) + (if new-value? + (format t "Setting slot ~A of ~A to ~S at ~A.~%" + slot-name instance new-value (get-universal-time)) + (format t "Reading slot ~A of ~A at ~A.~%" + slot-name instance (get-universal-time)))) +``` + +Note the use of the extended `&optional` form with the +[`supplied-p-parameter`][supplied] used to check whether a value was given, +which ensures this works correctly even when setting a slot to `nil`. + +[supplied]: http://www.lispworks.com/documentation/HyperSpec/Body/03_dab.htm + +Also note how we called it `new-value?` and not `new-value-p` as you'll +sometimes see people do. The `-p` in `new-value-p` stands for "predicate", and +a [predicate][predicate] is a *function* that returns a (generalized) boolean, +*not* a boolean itself. Using a name that ends in `-p` for a boolean value +(rather than for a predicate) is a pet peeve of mine. Unfortunately it happens +in a couple of places (even in Common Lisp itself), so it's something to watch +out for. + +[predicate]: http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_p.htm#predicate + +Now we can update our `monitored-class` to add a slot to store the monitoring +function for each class, and update the `slot-value-using-class` methods to use +that instead of writing the string themselves: + +```lisp +(defclass monitored-class (standard-class) + ((monitoring-function :initarg :monitoring-function + :accessor monitoring-function))) + +(defmethod c2mop:slot-value-using-class :before + ((class monitored-class) instance slot) + (funcall (monitoring-function class) + instance + (c2mop:slot-definition-name slot))) + +(defmethod (setf c2mop:slot-value-using-class) :before + (new-value (class monitored-class) instance slot) + (funcall (monitoring-function class) + instance + (c2mop:slot-definition-name slot) + new-value)) +``` + + +When a user creates a new `monitored-class`, we need to set the +`monitoring-function` slot appropriately. We might initially consider doing +this by having an `initform` for the `monitoring-function` slot in the +metaclass, like this: + +```lisp +(defclass monitored-class (standard-class) + ((monitoring-function :initarg :monitoring-function + :accessor monitoring-function + :initform #'log-slot-access))) ; default function +``` + +But this won't work for a number of reasons we'll see shortly. Instead we'll +need to handle the initialization ourselves. We'll do it in `shared-initialize` +so it will happen both when a class is first created and when it's reinitialized +(e.g. after it's redefined): + +```lisp +(defun parse-monitoring-function-class-option (arguments) + (case (length arguments) + (1 (eval (first arguments))) + (0 #'log-slot-access) + (t (error "Malformed class option ~S." + (list* :monitoring-function arguments))))) + +(defmethod shared-initialize :around + ((class monitored-class) slot-names + &rest initargs + &key monitoring-function &allow-other-keys) + (apply #'call-next-method class slot-names + :monitoring-function (parse-monitoring-function-class-option + monitoring-function) + initargs)) +``` + +If the user provided a `(:monitoring-function …)` class option we evaluate and +use it, otherwise we default to our simple logging function. + +There are a couple of things to note here. + +First, when `defclass` gets a class option like `(:monitoring-function foo)`, +what it *actually* passes to the `(re)initialize-instance` methods is the list +`(foo)`. This allows for class options with more than one argument. In our +case we only ever want a single argument, so we ensure the `length` of the +argument is `0` or `1` and handle the cases individually. + +Second, `defclass` does not evaluate the class option's arguments. If we say +`(:monitoring-function (lambda (i s &optional v) (print (list i s v))))` what we +get as the initarg will be `((lambda (i s &optional v) (print (list i s v))))`. +That's a list of a list of three elements, *not* a list of an actual function +object. If we want the arguments to be evaluated, we have to do it ourselves. +Unfortunately as far as I can tell there's no way to evaluate these arguments +from `defclass` in their lexical environment — we have to fall back to `eval` +and the null lexical environment. That means that something like this will not +work: + +```lisp +(flet ((monitor (instance slot-name &optional new-value) + …)) + (defclass foo () + (…slots…) + (:monitoring-function #'monitor))) +``` + +I haven't managed to find a way to make this work with `defclass`. If anyone +knows of a solution, please let me know. + +Third, you might notice that we're `apply`ing with the full `initargs` list, +which includes the original (unparsed) `monitoring-function`. But that keyword +argument will be shadowed by the `:monitoring-function` we add at the beginning, +so there's no need to bother removing it from `initargs` before we apply (though +it wouldn't hurt to do so). This is another Common Lisp idiom you'll see here +and there when someone wants to override a single keyword argument but preserve +all the rest. + +Now we can now talk about all the reasons why `:initform #'log-slot-access` +doesn't just magically solve all our problems. + +First, the `:initform` *would* work properly when you first define a class, but +we still need all the code in `shared-initialize` to do the `eval`ing of the +forms the user provides when they *don't* use it. + +Further, suppose a user runs: + +```lisp +(defclass foo () + ((some-slot :initarg :some-slot :accessor some-slot)) + (:metaclass monitored-class) + (:monitoring-function monitor-foo)) +``` + +Then later they *remove* the `:monitoring-function` from the `defclass` and +reevaluate it: + +```lisp +(defclass foo () + ((some-slot :initarg :some-slot :accessor some-slot)) + (:metaclass monitored-class)) +``` + +What the user (probably) expects here is for the class to have the default +monitoring function. This is what will happen if they start a fresh Lisp image +and load the current code into it. But if we had just used `:initform`, the +class would already have a value for the `monitoring-function` slot (the old +function) and since there's no *new* value being specified, the `:initform` +would never be used and nothing would get updated, so the class would continue +to use the old monitoring function. The user would have to clean things up +manually by killing the class with `(setf (find-class 'foo) nil)` and +reevaluating the `defclass`, or fixing the slot value up manually, or some other +ugly alternative. + +With all that out of the way, we can now use a custom `monitoring-function` to +do whatever we want: + +```lisp +(defparameter *user-slot-reads* (make-hash-table)) +(defparameter *user-slot-writes* (make-hash-table)) + +(defun track-user-slot-access + (instance slot-name &optional (new-value nil new-value?)) + (declare (ignore instance new-value)) + (incf (gethash slot-name + (if new-value? *user-slot-writes* *user-slot-reads*) + 0))) + +(defclass user () + ((id :initarg :id :accessor id) + (name :initarg :name :accessor name)) + (:metaclass monitored-class) + (:monitoring-function #'track-user-slot-access)) + +;; Two writes +(defparameter *u* (make-instance 'user :id 1 :name "sjl")) + +;; A read +(id *u*) + +;; Two more writes +(setf (name *u*) "steve") +(setf (name *u*) "sjl") + +;; Results +(alexandria:hash-table-alist *user-slot-reads*) +; => ((ID . 1)) + +(alexandria:hash-table-alist *user-slot-writes*) +; => ((NAME . 3) (ID . 1)) +``` + +## Toy Example Disclaimer™ + +The `monitored-class` example we've used so far is pretty small, and there are +a number of other ways we could accomplish the same thing, some of which might +not involve metaclasses at all. This might make my example seem overly +complicated. + +I wanted to keep the example small so I can focus on the actual problem I ran +into without getting bogged down in too many irrelevant details about a specific +implementation. If you're bothered by how we're using metaclasses here when +there are other ways to implement this toy example, feel free to implement +a more extensive `monitored-class` variant as an exercise: + +```lisp +(defclass user () + ((id …) + ;; Never monitor this slot: + (session-id … :monitored nil) + ;; We only care when this slot *changes*: + (role … :monitored/reads nil) + ;; Names are PII, redact their values before logging: + (name … :monitored/redact-value t) + ;; Redact the user portion of the email address, logging only the domain: + (email … :monitored/redact-value #'scrub-email)) + (:metaclass monitored-class) + (:monitoring-function #'log-to-syslog) + ;; Allow us to turn monitoring on/off globally: + (:monitor-when #'monitoring-enabled-p)) +``` + +## The Problem + +```lisp +(defclass paid-user (user) + ((plan :initarg :plan + :accessor plan + :type (member :bronze :silver :gold))) + (:metaclass monitored-class)) + +(make-instance 'paid-user :id 2 :name "moneybags" :plan :gold) +; => +; ?????????? +``` + + +```lisp +(defclass user () + (…slots…) + (:metaclass monitored-class) + (:monitoring-function yell)) + +; inherits YELL as the monitoring-function from the superclass +(defclass paid-user (user) + (…slots…) + (:metaclass monitored-class)) + +; actually let's log user slot accesses to Postgres for more safety +(defclass user () + (…slots…) + (:metaclass monitored-class) + (:monitoring-function log-to-postgres)) +``` + +## The Dependent Maintenance Protocol + + + + +```lisp +(ql:quickload :closer-mop) + +(defclass dep () + ((val :accessor val :initarg :val))) + +(defclass mc (standard-class) + ((x :accessor x) + ;; We need to not only store the value, but also store what the user + ;; originally gave, so we can recompute later if one of our superclasses + ;; changes. + ;; + ;; This slot will be unbound if the user didn't give an (:x foo) option, or + ;; bound to (foo) if they did. + (given-x :initarg :x :accessor given-x))) + +(defun mcp (object) + (typep object 'mc)) + +(defmethod c2mop:validate-superclass ((a mc) (b standard-class)) t) + +(defun dep= (d class) + (and (typep d 'dep) ; need to filter out any other deps other code might have added + (eql (val d) class))) + +(defun ensure-dep (superclass class) + (c2mop:map-dependents superclass + (lambda (d) + (when (dep= d class) + (return-from ensure-dep)))) + (c2mop:add-dependent superclass (make-instance 'dep :val class))) + +(defun ensure-no-dep (superclass class) + (c2mop:map-dependents superclass + (lambda (d) + (when (dep= d class) + (c2mop:remove-dependent superclass d) + (return-from ensure-no-dep))))) + +(defun recompute (&key class superclasses slot given-slot value value? default) + (format *debug-io* "~%Recomputing ~A of ~A." slot class) + (setf superclasses (remove-if-not #'mcp superclasses)) + ;; We need to store whether the user gave an explicit value for later. + (if value? + (setf (slot-value class given-slot) value) + (slot-makunbound class given-slot)) + ;; Set the actual value to the given value, or the superclass value, or the default. + ;; + ;; I THINK we only need to look at direct superclasses, not the entire class + ;; precedence list, because while it's possible for an MC to inherit from + ;; a standard-class, the reverse is not possible. So the only way for an MC + ;; to get into the precedence list is to be there directly or come through + ;; another MC that IS direct (and which would come first in the full list + ;; anyway). + (setf (slot-value class slot) + (cond + (value? (first value)) + (superclasses (slot-value (first superclasses) slot)) + (t default)))) + + +(defmethod initialize-instance :around ((class mc) &key + (x nil x?) + direct-superclasses &allow-other-keys) + (recompute :class class + :superclasses direct-superclasses + :slot 'x + :given-slot 'given-x + :value x + :value? x? + :default :some-default) + (call-next-method) + ;; You might think we could get away with only having dependencies on + ;; superclasses that happen to be our specific metaclass, instead of on ALL + ;; direct superclasses. Sadly this fails for forward-referenced classes, so + ;; we need to add dependencies on all of them and filter out the non-MC + ;; classes later. + (dolist (superclass (c2mop:class-direct-superclasses class)) + (ensure-dep superclass class))) + +(defmethod reinitialize-instance :around ((class mc) &key + (x nil x?) + (direct-superclasses nil direct-superclasses?) + &allow-other-keys) + ;; We have to recompute X /before/ we call-next-method because the + ;; update-dependent calls happen as part of that next method. If we wait + ;; until after call-next-method to patch up X, then the dependent will still + ;; see the old version when it's updated and won't get the new value until + ;; a second round of initialization. + (recompute :class class + :superclasses (if direct-superclasses? + direct-superclasses + (c2mop:class-direct-superclasses class)) + :slot 'x + :given-slot 'given-x + :value x + :value? x? + :default :some-default) + (let ((before (c2mop:class-direct-superclasses class))) + (call-next-method) + (let* ((after (c2mop:class-direct-superclasses class)) + (removed (set-difference before after)) + (added (set-difference after before))) + (dolist (superclass removed) + (ensure-no-dep superclass class)) + (dolist (superclass added) + (ensure-dep superclass class))))) + +(defmethod c2mop:update-dependent (obj (dep dep) &rest initargs) + (declare (ignore initargs)) + (when (typep obj 'mc) ; We can ignore changes in non-MC superclasses here. + (let ((class (val dep))) + (format *debug-io* "~%Updating ~A because ~A changed." class obj) + ;; Need to call reinitialize-instance here (instead of just recomputing the + ;; slots) because otherwise transitive dependencies won't get updated + ;; properly. + (apply #'reinitialize-instance class + (if (slot-boundp class 'given-x) + (list :x (slot-value class 'given-x)) + '()))))) +```