# HG changeset patch # User Steve Losh # Date 1462838365 0 # Node ID 6f08985f19afb81f866aaac2715045f047a55a00 # Parent b02a1a9684c2ce68a16428aa032f1ade3d535276 THE CELLING Convert all registers (local, argument, stack) to contain cells instead of addresses into the heap. This was extremely painful but necessary to move forward. diff -r b02a1a9684c2 -r 6f08985f19af src/wam/constants.lisp --- a/src/wam/constants.lisp Mon May 09 22:00:49 2016 +0000 +++ b/src/wam/constants.lisp Mon May 09 23:59:25 2016 +0000 @@ -20,6 +20,7 @@ :documentation "Maximum size of the WAM code store.") (define-constant +code-sentinal+ (1- +code-limit+) + ; TODO: Should this sentinal value be 0 like everything else? :documentation "Sentinal value used in the PC and CP.") @@ -37,7 +38,7 @@ (define-constant +register-count+ 2048 - :documentation "The number of registers the WAM has available.") + :documentation "The number of local registers the WAM has available.") (define-constant +maximum-arity+ 1024 @@ -59,7 +60,8 @@ :documentation "The address in the store of the first cell of the stack.") (define-constant +stack-end+ (+ +stack-start+ +stack-limit+) - :documentation "The address in the store one past the last cell in the stack.") + :documentation + "The address in the store one past the last cell in the stack.") (define-constant +heap-start+ +stack-end+ :documentation "The address in the store of the first cell of the heap.") @@ -68,10 +70,15 @@ ;; The trail's fill pointer is stored inside choice frames on the stack, so it ;; needs to be able to fit inside a stack word. We don't tag it, though, so ;; we can technically use all of the cell bits if we want. + ;; + ;; TODO: should probably limit this to something more reasonable :documentation "The maximum number of variables that may exist in the trail.") -(define-constant +store-limit+ (expt 2 16) +(define-constant +store-limit+ (expt 2 +cell-value-width+) + ;; Reference cells need to be able to store a heap address in their value + ;; bits, so that limits the amount of addressable space we've got to work + ;; with. :documentation "Maximum size of the WAM store.") (define-constant +heap-limit+ (- +store-limit+ +register-count+ +stack-limit+) diff -r b02a1a9684c2 -r 6f08985f19af src/wam/dump.lisp --- a/src/wam/dump.lisp Mon May 09 22:00:49 2016 +0000 +++ b/src/wam/dump.lisp Mon May 09 23:59:25 2016 +0000 @@ -101,7 +101,8 @@ (format nil "N: ~D" cell)))) ((< arg nargs) (prog1 - (format nil " Y~D: ~8,'0X" arg cell) + (format nil " Y~D: ~8,'0X ~A" + arg cell (cell-aesthetic cell)) (when (= nargs (incf arg)) (setf currently-in nil)))))) (:choice ; sweet lord make it stop @@ -123,7 +124,8 @@ ((= offset 6) "H saved heap pointer") ((< arg nargs) (prog1 - (format nil " Y~D: ~8,'0X" arg cell) + (format nil " A~D: ~8,'0X ~A" + arg cell (cell-aesthetic cell)) (when (= nargs (incf arg)) (setf currently-in nil)))))) (t "")) @@ -276,17 +278,16 @@ (defun dump-wam-registers (wam) (format t "REGISTERS:~%") - (format t "~5@A -> ~8@A~%" "S" (wam-subterm wam)) - (loop :for i :from 0 :to +register-count+ - :for reg :across (wam-store wam) - :for contents = (when (not (zerop reg)) - (wam-heap-cell wam reg)) - :when contents - :do (format t "~5@A -> ~8,'0X ~10A ~A~%" - (format nil "X~D" i) - reg - (cell-aesthetic contents) - (format nil "; ~A" (first (extract-things wam (list reg))))))) + (format t "~5@A -> ~8X~%" "S" (wam-subterm wam)) + (loop :for register :from 0 :to +register-count+ + :for contents :across (wam-store wam) + :when (not (cell-null-p contents)) + :do + (format t "~5@A -> ~8,'0X ~10A ~A~%" + (format nil "X~D" register) + contents + (cell-aesthetic contents) + (format nil "; ~A" (first (extract-things wam (list register))))))) (defun dump-wam-functors (wam) (format t " FUNCTORS: ~S~%" (wam-functors wam))) @@ -296,7 +297,7 @@ (loop :for addr :across (wam-trail wam) :do (format t "~4,'0X ~A //" addr - (cell-aesthetic (wam-heap-cell wam addr)))) + (cell-aesthetic (wam-store-cell wam addr)))) (format t "~%")) (defun dump-labels (wam) diff -r b02a1a9684c2 -r 6f08985f19af src/wam/types.lisp --- a/src/wam/types.lisp Mon May 09 22:00:49 2016 +0000 +++ b/src/wam/types.lisp Mon May 09 23:59:25 2016 +0000 @@ -52,6 +52,8 @@ `(integer 3 ,+stack-frame-size-limit+)) (deftype stack-choice-size () + ;; TODO: is this actually right? check on frame size limit vs choice point + ;; size limit... `(integer 7 ,+stack-frame-size-limit+)) (deftype stack-frame-argcount () @@ -72,7 +74,7 @@ environment-pointer ; CE continuation-pointer ; CP stack-frame-argcount ; N - heap-index)) ; Yn + cell)) ; Yn (deftype stack-choice-word () '(or @@ -81,9 +83,32 @@ continuation-pointer ; CP, BP stack-frame-argcount ; N trail-index ; TR - heap-index)) ; An, H + heap-index ; H + cell)) ; An (deftype stack-word () '(or stack-frame-word stack-choice-word)) +;;;; Sanity Checks +;;; The values on the WAM stack are a bit of a messy situation. The WAM store +;;; is defined as an array of cells, but certain things on the stack aren't +;;; actually cells (e.g. the stored continuation pointer). +;;; +;;; This shouldn't be a problem (aside from being ugly) as long as our `cell` +;;; type is big enough to hold the values of these non-cell things. So let's +;;; just make sure that's the case... +(defun sanity-check-stack-type (type) + (assert (subtypep type 'cell) () + "Type ~A is too large to fit into a cell!" + type) + (values)) + +(sanity-check-stack-type 'stack-frame-argcount) +(sanity-check-stack-type 'environment-pointer) +(sanity-check-stack-type 'continuation-pointer) +(sanity-check-stack-type 'backtrack-pointer) +(sanity-check-stack-type 'trail-index) +(sanity-check-stack-type 'stack-word) + + diff -r b02a1a9684c2 -r 6f08985f19af src/wam/vm.lisp --- a/src/wam/vm.lisp Mon May 09 22:00:49 2016 +0000 +++ b/src/wam/vm.lisp Mon May 09 23:59:25 2016 +0000 @@ -28,19 +28,19 @@ (wam-heap-push! wam (make-cell-functor functor))) -(defun* bound-reference-p ((wam wam) (address heap-index)) +(defun* bound-reference-p ((wam wam) (address store-index)) (:returns boolean) "Return whether the cell at `address` is a bound reference." (ensure-boolean - (let ((cell (wam-heap-cell wam address))) + (let ((cell (wam-store-cell wam address))) (and (cell-reference-p cell) (not (= (cell-value cell) address)))))) -(defun* unbound-reference-p ((wam wam) (address heap-index)) +(defun* unbound-reference-p ((wam wam) (address store-index)) (:returns boolean) "Return whether the cell at `address` is an unbound reference." (ensure-boolean - (let ((cell (wam-heap-cell wam address))) + (let ((cell (wam-store-cell wam address))) (and (cell-reference-p cell) (= (cell-value cell) address))))) @@ -76,14 +76,14 @@ (wam-backtracked wam) t)) (values)) -(defun* trail! ((wam wam) (address heap-index)) +(defun* trail! ((wam wam) (address store-index)) (:returns :void) "Push the given address onto the trail (but only if necessary)." (when (< address (wam-heap-backtrack-pointer wam)) (wam-trail-push! wam address)) (values)) -(defun* unbind! ((wam wam) (address heap-index)) +(defun* unbind! ((wam wam) (address store-index)) (:returns :void) "Unbind the reference cell at `address`. @@ -91,7 +91,7 @@ a reference cell. " - (setf (wam-heap-cell wam address) + (setf (wam-store-cell wam address) (make-cell-reference address)) (values)) @@ -105,9 +105,9 @@ (unbind! wam (wam-trail-value wam i))) (values)) -(defun* deref ((wam wam) (address heap-index)) - (:returns heap-index) - "Dereference the address in the WAM to its eventual destination. +(defun* deref ((wam wam) (address store-index)) + (:returns store-index) + "Dereference the address in the WAM store to its eventual destination. If the address is a variable that's bound to something, that something will be looked up (recursively) and the address of whatever it's ultimately bound to @@ -115,10 +115,10 @@ " (if (bound-reference-p wam address) - (deref wam (cell-value (wam-heap-cell wam address))) + (deref wam (cell-value (wam-store-cell wam address))) address)) -(defun* bind! ((wam wam) (address-1 heap-index) (address-2 heap-index)) +(defun* bind! ((wam wam) (address-1 store-index) (address-2 store-index)) (:returns :void) "Bind the unbound reference cell to the other. @@ -133,19 +133,19 @@ (cond ;; a1 <- a2 ((unbound-reference-p wam address-1) - (setf (wam-heap-cell wam address-1) + (setf (wam-store-cell wam address-1) (make-cell-reference address-2)) (trail! wam address-1)) ;; a2 <- 1a ((unbound-reference-p wam address-2) - (setf (wam-heap-cell wam address-2) + (setf (wam-store-cell wam address-2) (make-cell-reference address-1)) (trail! wam address-2)) ;; wut (t (error "At least one cell must be an unbound reference when binding."))) (values)) -(defun* unify! ((wam wam) (a1 heap-index) (a2 heap-index)) +(defun* unify! ((wam wam) (a1 store-index) (a2 store-index)) (wam-unification-stack-push! wam a1) (wam-unification-stack-push! wam a2) (setf (wam-fail wam) nil) @@ -155,8 +155,8 @@ (let ((d1 (deref wam (wam-unification-stack-pop! wam))) (d2 (deref wam (wam-unification-stack-pop! wam)))) (when (not (= d1 d2)) - (let ((cell-1 (wam-heap-cell wam d1)) - (cell-2 (wam-heap-cell wam d2))) + (let ((cell-1 (wam-store-cell wam d1)) + (cell-2 (wam-store-cell wam d2))) (if (or (cell-reference-p cell-1) (cell-reference-p cell-2)) ;; If at least one is a reference, bind them. @@ -167,8 +167,8 @@ ;; Otherwise we're looking at two structures (hopefully, lol). (let* ((structure-1-addr (cell-value cell-1)) ; find where they (structure-2-addr (cell-value cell-2)) ; start on the heap - (functor-1 (wam-heap-cell wam structure-1-addr)) ; grab the - (functor-2 (wam-heap-cell wam structure-2-addr))) ; functors + (functor-1 (wam-store-cell wam structure-1-addr)) ; grab the + (functor-2 (wam-store-cell wam structure-2-addr))) ; functors (if (functors-match-p functor-1 functor-2) ;; If the functors match, push their pairs of arguments onto ;; the stack to be unified. @@ -247,33 +247,28 @@ ((wam wam) (functor functor-index) (register register-index)) - (->> (push-new-structure! wam) - (nth-value 1) - (setf (wam-local-register wam register))) + (setf (wam-local-register wam register) + (push-new-structure! wam)) (push-new-functor! wam functor)) (define-instructions (%set-variable-local %set-variable-stack) ((wam wam) (register register-index)) - (->> (push-unbound-reference! wam) - (nth-value 1) - (setf (%wam-register% wam register)))) + (setf (%wam-register% wam register) + (push-unbound-reference! wam))) (define-instructions (%set-value-local %set-value-stack) ((wam wam) (register register-index)) - (wam-heap-push! wam (->> register - (%wam-register% wam) - (wam-heap-cell wam)))) + (wam-heap-push! wam (%wam-register% wam register))) (define-instructions (%put-variable-local %put-variable-stack) ((wam wam) (register register-index) (argument register-index)) - (->> (push-unbound-reference! wam) - (nth-value 1) - (setf (%wam-register% wam register)) - (setf (wam-local-register wam argument)))) + (let ((new-reference (push-unbound-reference! wam))) + (setf (%wam-register% wam register) new-reference + (wam-local-register wam argument) new-reference))) (define-instructions (%put-value-local %put-value-stack) ((wam wam) @@ -288,8 +283,8 @@ (functor functor-index) (register register-index)) (with-accessors ((mode wam-mode) (s wam-subterm)) wam - (let* ((addr (deref wam (wam-local-register wam register))) - (cell (wam-heap-cell wam addr))) + (let* ((addr (deref wam register)) + (cell (wam-store-cell wam addr))) (cond ;; If the register points at a reference cell, we push two new cells onto ;; the heap: @@ -329,11 +324,11 @@ ;; stream of instructions will be another get-structure and we'll just ;; blow away the S register there. ((cell-structure-p cell) - (let* ((functor-addr (cell-value cell)) - (functor-cell (wam-heap-cell wam functor-addr))) + (let* ((functor-address (cell-value cell)) + (functor-cell (wam-heap-cell wam functor-address))) (if (matching-functor-p functor-cell functor) (setf mode :read - s (1+ functor-addr)) + s (1+ functor-address)) (backtrack! wam "Functors don't match in get-struct")))) (t (backtrack! wam (format nil "get-struct on a non-ref/struct cell ~A" (cell-aesthetic cell)))))))) @@ -341,25 +336,18 @@ (define-instructions (%unify-variable-local %unify-variable-stack) ((wam wam) (register register-index)) - (ecase (wam-mode wam) - (:read (setf (%wam-register% wam register) - (wam-subterm wam))) - (:write (->> (push-unbound-reference! wam) - (nth-value 1) - (setf (%wam-register% wam register))))) + (setf (%wam-register% wam register) + (ecase (wam-mode wam) + (:read (wam-heap-cell wam (wam-subterm wam))) + (:write (push-unbound-reference! wam)))) (incf (wam-subterm wam))) (define-instructions (%unify-value-local %unify-value-stack) ((wam wam) (register register-index)) (ecase (wam-mode wam) - (:read (unify! wam - (%wam-register% wam register) - (wam-subterm wam))) - (:write (wam-heap-push! wam - (->> register - (%wam-register% wam) - (wam-heap-cell wam))))) + (:read (unify! wam register (wam-subterm wam))) + (:write (wam-heap-push! wam (%wam-register% wam register)))) (incf (wam-subterm wam))) (define-instructions (%get-variable-local %get-variable-stack) @@ -373,9 +361,7 @@ ((wam wam) (register register-index) (argument register-index)) - (unify! wam - (%wam-register% wam register) - (wam-local-register wam argument))) + (unify! wam register argument)) ;;;; Control Instructions @@ -491,7 +477,7 @@ (defun extract-things (wam addresses) - "Extract the things at the given heap addresses. + "Extract the things at the given store addresses. The things will be returned in the same order as the addresses were given. @@ -511,7 +497,7 @@ (cdr (or (assoc address unbound-vars) (mark-unbound-var address)))) (recur (address) - (let ((cell (wam-heap-cell wam (deref wam address)))) + (let ((cell (wam-store-cell wam (deref wam address)))) (cond ((cell-null-p cell) "NULL?!") ((cell-reference-p cell) (extract-var (cell-value cell))) @@ -530,8 +516,9 @@ (defun extract-query-results (wam vars) (let* ((addresses (loop :for var :in vars - :for i :from 0 - :collect (wam-stack-frame-arg wam i))) + ;; TODO: make this suck less + :for i :from (+ (wam-environment-pointer wam) 3) + :collect i)) (results (extract-things wam addresses))) (weave vars results))) diff -r b02a1a9684c2 -r 6f08985f19af src/wam/wam.lisp --- a/src/wam/wam.lisp Mon May 09 22:00:49 2016 +0000 +++ b/src/wam/wam.lisp Mon May 09 23:59:25 2016 +0000 @@ -71,16 +71,16 @@ (make-array 16 :fill-pointer 0 :adjustable t - :element-type 'heap-index) - :type (vector heap-index) + :element-type 'store-index) + :type (vector store-index) :read-only t) (trail (make-array 64 :fill-pointer 0 :adjustable t :initial-element 0 - :element-type 'heap-index) - :type (vector heap-index) + :element-type 'store-index) + :type (vector store-index) :read-only t) ;; Unique registers @@ -99,6 +99,21 @@ (mode nil :type (or null (member :read :write)))) +;;;; Store +(defun* wam-store-cell ((wam wam) (address store-index)) + (:returns cell) + "Return the cell at the given address. + + Please don't use this unless you absolutely have to. Prefer something more + specific like `wam-heap-cell` else so you've got some extra sanity checking... + + " + (aref (wam-store wam) address)) + +(defun (setf wam-store-cell) (new-value wam address) + (setf (aref (wam-store wam) address) new-value)) + + ;;;; Heap ;;; The WAM heap is all the memory left in the store after the local registers ;;; and stack have been accounted for. Because the store is adjustable and the @@ -157,8 +172,8 @@ (setf (fill-pointer (wam-trail wam)) new-value)) -(defun* wam-trail-push! ((wam wam) (address heap-index)) - (:returns (values heap-index trail-index)) +(defun* wam-trail-push! ((wam wam) (address store-index)) + (:returns (values store-index trail-index)) "Push `address` onto the trail. Returns the address and the trail address it was pushed to. @@ -170,14 +185,14 @@ (values address (vector-push-extend address trail))))) (defun* wam-trail-pop! ((wam wam)) - (:returns heap-index) + (:returns store-index) "Pop the top address off the trail and return it." (vector-pop (wam-trail wam))) (defun* wam-trail-value ((wam wam) (address trail-index)) ;; TODO: can we really not just pop, or is something else gonna do something ;; fucky with the trail? - (:returns heap-index) + (:returns store-index) "Return the element (a heap index) in the WAM trail at `address`." (aref (wam-trail wam) address)) @@ -275,7 +290,7 @@ &optional ((e environment-pointer) (wam-environment-pointer wam))) - (:returns heap-index) + (:returns cell) (wam-stack-word wam (+ 3 n e))) (defun (setf wam-stack-frame-arg) @@ -283,15 +298,6 @@ (setf (wam-stack-word wam (+ e 3 n)) new-value)) -(defun* wam-stack-frame-arg-cell - ((wam wam) - (n register-index) - &optional - ((e environment-pointer) - (wam-environment-pointer wam))) - (:returns cell) - (wam-heap-cell wam (wam-stack-frame-arg wam n e))) - (defun* wam-stack-frame-size ((wam wam) @@ -381,7 +387,7 @@ &optional ((b backtrack-pointer) (wam-backtrack-pointer wam))) - (:returns heap-index) + (:returns cell) (wam-stack-word wam (+ b 7 n))) (defun (setf wam-stack-choice-arg) @@ -389,15 +395,6 @@ (setf (wam-stack-word wam (+ b 7 n)) new-value)) -(defun* wam-stack-choice-arg-cell - ((wam wam) - (n arity) - &optional - ((b backtrack-pointer) - (wam-backtrack-pointer wam))) - (:returns cell) - (wam-heap-cell wam (wam-stack-choice-arg wam n b))) - (defun* wam-stack-choice-size ((wam wam) @@ -541,24 +538,70 @@ ;;;; Registers -;;; The WAM has two types of registers. A register (regardless of type) always -;;; contains an index into the heap (basically a pointer to a heap cell). +;;; The WAM has two types of registers: ;;; -;;; Local/temporary/arguments registers live at the beginning of the WAM memory -;;; store. +;;; * Local/temporary/arguments registers live at the beginning of the WAM +;;; memory store. ;;; -;;; Stack/permanent registers live on the stack, and need some extra math to -;;; find their location. +;;; * Stack/permanent registers live on the stack, and need some extra math to +;;; find their location. ;;; ;;; Registers are typically denoted by their "register index", which is just ;;; their number. Hoever, the bytecode needs to be able to distinguish between ;;; local and stack registers. To do this we just make separate opcodes for ;;; each kind. This is ugly, but it lets us figure things out at compile time ;;; instead of runtime, and register references happen A LOT at runtime. +;;; +;;; As for the CONTENTS of register: a register (regardless of type) always +;;; contains a cell. The book is maddeningly unclear on this in a bunch of +;;; ways. I will list them here so maybe you can feel a bit of my suffering +;;; through these bytes of text. +;;; +;;; The first thing the book says about registers is "registers have the same +;;; format as heap cells". Okay, fine. The *very next diagram* shows "register +;;; assignments" that appear to put things that are very much *not* heap cells +;;; into registers! +;;; +;;; After a bit of puttering you realize that the diagram is referring only to +;;; the compilation, not what's *actually* stored in these registers at runtime. +;;; You move on and see some pseudocode that contains `X_i <- HEAP[H]` which +;;; confirms that his original claim was accurate, and registers are actually +;;; (copies of) heap cells. Cool. +;;; +;;; Then you move on and see the definition of `deref(a : address)` and note +;;; that it takes an *address* as an argument. On the next page you see +;;; `deref(X_i)` and wait what the fuck, a register is an *address* now? You +;;; scan down the page and see `HEAP[H] <- X_i` which means no wait it's a cell +;;; again. +;;; +;;; After considering depositing your laptop into the nearest toilet and +;;; becoming a sheep farmer, you conclude a few things: +;;; +;;; 1. The book's code won't typecheck. +;;; 2. The author is playing fast and loose with `X_i` -- sometimes it seems to +;;; be used as an address, sometimes as a cell. +;;; 3. The author never bothers to nail down exactly what is inside the fucking +;;; things, which is a problem because of #2. +;;; +;;; If you're like me (painfully unlucky), you took a wild guess and decided to +;;; implement registers as containing *addresses*, i.e., indexes into the +;;; heap, figuring that if you were wrong it would soon become apparent. +;;; +;;; WELL it turns out that you can get all the way to CHAPTER FIVE with +;;; registers implemented as addresses, at which point you hit a wall and need +;;; to spend a few hours refactoring a giant chunk of your code and writing +;;; angry comments in your source code. +;;; +;;; Hopefully I can save someone else this misery by leaving you with this: +;;; ____ _____________________________________ _____ ___ ____ ______ ______________ __ _____ +;;; / __ \/ ____/ ____/ _/ ___/_ __/ ____/ __ \/ ___/ / | / __ \/ ____/ / ____/ ____/ / / / / ___/ +;;; / /_/ / __/ / / __ / / \__ \ / / / __/ / /_/ /\__ \ / /| | / /_/ / __/ / / / __/ / / / / \__ \ +;;; / _, _/ /___/ /_/ // / ___/ // / / /___/ _, _/___/ / / ___ |/ _, _/ /___ / /___/ /___/ /___/ /______/ / +;;; /_/ |_/_____/\____/___//____//_/ /_____/_/ |_|/____/ /_/ |_/_/ |_/_____/ \____/_____/_____/_____/____/ (defun* wam-local-register ((wam wam) (register register-index)) - (:returns (or (eql 0) heap-index)) - "Return the value of the WAM local register with the given index." + (:returns cell) + "Return the value stored in the WAM local register with the given index." (aref (wam-store wam) register)) (defun (setf wam-local-register) (new-value wam register) @@ -566,8 +609,8 @@ (defun* wam-stack-register ((wam wam) (register register-index)) - (:returns (or (eql 0) heap-index)) - "Return the value of the WAM stack register with the given index." + (:returns cell) + "Return the value stored in the WAM stack register with the given index." (wam-stack-frame-arg wam register)) (defun (setf wam-stack-register) (new-value wam register) @@ -618,13 +661,13 @@ ;;;; Unification Stack -(defun* wam-unification-stack-push! ((wam wam) (address heap-index)) +(defun* wam-unification-stack-push! ((wam wam) (address store-index)) (:returns :void) (vector-push-extend address (wam-unification-stack wam)) (values)) (defun* wam-unification-stack-pop! ((wam wam)) - (:returns heap-index) + (:returns store-index) (vector-pop (wam-unification-stack wam))) (defun* wam-unification-stack-empty-p ((wam wam))