# HG changeset patch # User Steve Losh # Date 1471730787 0 # Node ID 81939d20415a75f76263a87c2f742f62c1c57b77 # Parent 19200659513a3d1517fd05da8b6c5c55e148c680 Remove newly-useless nesting in the src directory diff -r 19200659513a -r 81939d20415a bones.asd --- a/bones.asd Sat Aug 20 21:56:20 2016 +0000 +++ b/bones.asd Sat Aug 20 22:06:27 2016 +0000 @@ -24,26 +24,24 @@ :components ((:file "utils") (:file "circle") - (:module "wam" + (:file "constants") + (:file "types") + (:file "bytecode") + (:file "wam") + (:module "compiler" :serial t - :components ((:file "constants") - (:file "types") - (:file "bytecode") - (:file "wam") - (:module "compiler" - :serial t - :components ((:file "0-data") - (:file "1-parsing") - (:file "2-register-allocation") - (:file "3-flattening") - (:file "4-tokenization") - (:file "5-precompilation") - (:file "6-optimization") - (:file "7-rendering") - (:file "8-ui"))) - (:file "vm") - (:file "dump") - (:file "ui"))) + :components ((:file "0-data") + (:file "1-parsing") + (:file "2-register-allocation") + (:file "3-flattening") + (:file "4-tokenization") + (:file "5-precompilation") + (:file "6-optimization") + (:file "7-rendering") + (:file "8-ui"))) + (:file "vm") + (:file "dump") + (:file "ui") (:file "bones"))))) (asdf:defsystem #:bones-test diff -r 19200659513a -r 81939d20415a src/bytecode.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/bytecode.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,172 @@ +(in-package #:bones.wam) + + +;;;; Opcodes +(defun opcode-name (opcode) + (eswitch (opcode) + (+opcode-noop+ "NOOP") + + (+opcode-get-structure+ "GET-STRUCTURE") + (+opcode-get-variable-local+ "GET-VARIABLE") + (+opcode-get-variable-stack+ "GET-VARIABLE") + (+opcode-get-value-local+ "GET-VALUE") + (+opcode-get-value-stack+ "GET-VALUE") + + (+opcode-put-structure+ "PUT-STRUCTURE") + (+opcode-put-variable-local+ "PUT-VARIABLE") + (+opcode-put-variable-stack+ "PUT-VARIABLE") + (+opcode-put-value-local+ "PUT-VALUE") + (+opcode-put-value-stack+ "PUT-VALUE") + (+opcode-put-void+ "PUT-VOID") + + (+opcode-subterm-variable-local+ "SUBTERM-VARIABLE") + (+opcode-subterm-variable-stack+ "SUBTERM-VARIABLE") + (+opcode-subterm-value-local+ "SUBTERM-VALUE") + (+opcode-subterm-value-stack+ "SUBTERM-VALUE") + (+opcode-subterm-void+ "SUBTERM-VOID") + + (+opcode-jump+ "JUMP") + (+opcode-call+ "CALL") + (+opcode-dynamic-jump+ "DYNAMIC-JUMP") + (+opcode-dynamic-call+ "DYNAMIC-CALL") + (+opcode-proceed+ "PROCEED") + (+opcode-allocate+ "ALLOCATE") + (+opcode-deallocate+ "DEALLOCATE") + (+opcode-done+ "DONE") + (+opcode-try+ "TRY") + (+opcode-retry+ "RETRY") + (+opcode-trust+ "TRUST") + (+opcode-cut+ "CUT") + + (+opcode-get-constant+ "GET-CONSTANT") + (+opcode-put-constant+ "PUT-CONSTANT") + (+opcode-subterm-constant+ "SUBTERM-CONSTANT") + + (+opcode-get-list+ "GET-LIST") + (+opcode-put-list+ "PUT-LIST") + + (+opcode-get-lisp-object+ "GET-LISP-OBJECT") + (+opcode-put-lisp-object+ "PUT-LISP-OBJECT"))) + +(defun opcode-short-name (opcode) + (eswitch (opcode) + (+opcode-noop+ "NOOP") + + (+opcode-get-structure+ "GETS") + (+opcode-get-variable-local+ "GVAR") + (+opcode-get-variable-stack+ "GVAR") + (+opcode-get-value-local+ "GVLU") + (+opcode-get-value-stack+ "GVLU") + + (+opcode-put-structure+ "PUTS") + (+opcode-put-variable-local+ "PVAR") + (+opcode-put-variable-stack+ "PVAR") + (+opcode-put-value-local+ "PVLU") + (+opcode-put-value-stack+ "PVLU") + (+opcode-put-void+ "PVOI") + + (+opcode-subterm-variable-local+ "SVAR") + (+opcode-subterm-variable-stack+ "SVAR") + (+opcode-subterm-value-local+ "SVLU") + (+opcode-subterm-value-stack+ "SVLU") + (+opcode-subterm-void+ "SVOI") + + (+opcode-jump+ "JUMP") + (+opcode-call+ "CALL") + (+opcode-dynamic-jump+ "DYJP") + (+opcode-dynamic-call+ "DYCL") + (+opcode-proceed+ "PROC") + (+opcode-allocate+ "ALOC") + (+opcode-deallocate+ "DEAL") + (+opcode-done+ "DONE") + (+opcode-try+ "TRYM") + (+opcode-retry+ "RTRY") + (+opcode-trust+ "TRST") + (+opcode-cut+ "CUTT") + + (+opcode-get-constant+ "GCON") + (+opcode-put-constant+ "PCON") + (+opcode-subterm-constant+ "UCON") + + (+opcode-get-list+ "GLST") + (+opcode-put-list+ "PLST") + + (+opcode-get-lisp-object+ "GLOB") + (+opcode-put-lisp-object+ "PLOB"))) + + +;;;; Instructions +(define-lookup instruction-size (opcode instruction-size 0) + "Return the size of an instruction for the given opcode. + + The size includes one word for the opcode itself and one for each argument. + + " + (#.+opcode-noop+ 1) + + (#.+opcode-get-structure+ 4) + (#.+opcode-get-variable-local+ 3) + (#.+opcode-get-variable-stack+ 3) + (#.+opcode-get-value-local+ 3) + (#.+opcode-get-value-stack+ 3) + + (#.+opcode-put-structure+ 4) + (#.+opcode-put-variable-local+ 3) + (#.+opcode-put-variable-stack+ 3) + (#.+opcode-put-value-local+ 3) + (#.+opcode-put-value-stack+ 3) + (#.+opcode-put-void+ 2) + + (#.+opcode-subterm-variable-local+ 2) + (#.+opcode-subterm-variable-stack+ 2) + (#.+opcode-subterm-value-local+ 2) + (#.+opcode-subterm-value-stack+ 2) + (#.+opcode-subterm-void+ 2) + + (#.+opcode-jump+ 3) + (#.+opcode-call+ 3) + (#.+opcode-dynamic-jump+ 1) + (#.+opcode-dynamic-call+ 1) + (#.+opcode-proceed+ 1) + (#.+opcode-allocate+ 2) + (#.+opcode-deallocate+ 1) + (#.+opcode-done+ 1) + (#.+opcode-try+ 2) + (#.+opcode-retry+ 2) + (#.+opcode-trust+ 1) + (#.+opcode-cut+ 1) + + (#.+opcode-get-constant+ 3) + (#.+opcode-put-constant+ 3) + (#.+opcode-subterm-constant+ 2) + + (#.+opcode-get-list+ 2) + (#.+opcode-put-list+ 2) + + (#.+opcode-get-lisp-object+ 3) + (#.+opcode-put-lisp-object+ 3)) + + +;;;; Cells +(define-lookup cell-type-name (type string "") + "Return the full name of a cell type." + (#.+cell-type-null+ "NULL") + (#.+cell-type-structure+ "STRUCTURE") + (#.+cell-type-reference+ "REFERENCE") + (#.+cell-type-functor+ "FUNCTOR") + (#.+cell-type-constant+ "CONSTANT") + (#.+cell-type-list+ "LIST") + (#.+cell-type-lisp-object+ "LISP-OBJECT") + (#.+cell-type-stack+ "STACK")) + +(define-lookup cell-type-short-name (type string "") + "Return the short name of a cell type." + (#.+cell-type-null+ "NUL") + (#.+cell-type-structure+ "STR") + (#.+cell-type-reference+ "REF") + (#.+cell-type-functor+ "FUN") + (#.+cell-type-constant+ "CON") + (#.+cell-type-list+ "LIS") + (#.+cell-type-lisp-object+ "OBJ") + (#.+cell-type-stack+ "STK")) + diff -r 19200659513a -r 81939d20415a src/compiler/0-data.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/0-data.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,203 @@ +(in-package #:bones.wam) + +;;;; .-,--. . +;;;; ' | \ ,-. |- ,-. +;;;; , | / ,-| | ,-| +;;;; `-^--' `-^ `' `-^ + +;;;; Constants +(defconstant +choice-point-placeholder+ 'choice-point-placeholder) + + +;;;; Utils +(declaim (inline variablep)) + +(defun variablep (term) + (and (symbolp term) + (char= (char (symbol-name term) 0) #\?))) + +(defun lisp-object-to-string (o) + (with-output-to-string (str) + (print-unreadable-object (o str :type t :identity t)))) + +(defun required () + (error "Argument required.")) + + +;;;; Registers +(declaim (inline register-type register-number make-register register= + register-argument-p + register-temporary-p + register-permanent-p + register-anonymous-p)) + + +(deftype register-type () + '(member :argument :local :permanent :anonymous)) + +(deftype register-number () + `(integer 0 ,(1- +register-count+))) + + +(defstruct (register (:constructor make-register (type number))) + (type (required) :type register-type) + (number (required) :type register-number)) + + +(defun make-temporary-register (number arity) + (make-register (if (< number arity) :argument :local) + number)) + +(defun make-permanent-register (number) + (make-register :permanent number)) + +(defun make-anonymous-register () + (make-register :anonymous 0)) + + +(defun register-to-string (register) + (if (eq (register-type register) :anonymous) + "__" + (format nil "~A~D" + (ecase (register-type register) + (:argument #\A) + (:local #\X) + (:permanent #\Y)) + (+ (register-number register) + (if *off-by-one* 1 0))))) + +(defmethod print-object ((object register) stream) + (print-unreadable-object (object stream :identity nil :type nil) + (format stream (register-to-string object)))) + + +(defun register-argument-p (register) + (eq (register-type register) :argument)) + +(defun register-temporary-p (register) + (and (member (register-type register) '(:argument :local)) t)) + +(defun register-permanent-p (register) + (eq (register-type register) :permanent)) + +(defun register-anonymous-p (register) + (eq (register-type register) :anonymous)) + + +(defun register= (r1 r2) + (and (eq (register-type r1) + (register-type r2)) + (= (register-number r1) + (register-number r2)))) + + + +;;;; Clause Properties +;;; When tokenizing/precompiling a clause there are a few pieces of metadata +;;; we're going to need. We group them into a struct to make it easier to pass +;;; everything around. + +(defstruct (clause-properties (:conc-name clause-)) + (nead-vars nil :type list) + (nead-arity 0 :type arity) + (permanent-vars nil :type list) + (anonymous-vars nil :type list)) + + +(defun find-variables (terms) + "Return the set of variables in `terms`." + (let ((variables nil)) + (recursively ((term terms)) + (cond + ((variablep term) (pushnew term variables)) + ((consp term) (recur (car term)) + (recur (cdr term))) + (t nil))) + variables)) + +(defun find-shared-variables (terms) + "Return the set of all variables shared by two or more terms." + (labels + ((count-uses (variable) + (count-if (curry #'tree-member-p variable) terms)) + (shared-p (variable) + (> (count-uses variable) 1))) + (remove-if-not #'shared-p (find-variables terms)))) + +(defun find-permanent-variables (clause) + "Return a list of all the permanent variables in `clause`. + + Permanent variables are those that appear in more than one goal of the clause, + where the head of the clause is considered to be a part of the first goal. + + " + (if (<= (length clause) 2) + (list) ; Facts and chain rules have no permanent variables at all + (destructuring-bind (head body-first . body-rest) clause + ;; The head is treated as part of the first goal for the purposes of + ;; finding permanent variables. + (find-shared-variables (cons (cons head body-first) body-rest))))) + +(defun find-nead-variables (clause) + "Return a list of all variables in the nead of `clause`. + + The head and neck (first term in the body) are the 'nead'. + + " + (if (<= (length clause) 1) + (list) + (destructuring-bind (head body-first . body-rest) clause + (declare (ignore body-rest)) + (find-variables (list head body-first))))) + +(defun find-anonymous-variables (clause) + "Return a list of all anonymous variables in `clause`. + + Anonymous variables are variables that are only ever used once. + + " + (let ((seen nil) + (once nil)) + (recursively ((term clause)) + (cond + ((variablep term) + (if (member term seen) + (when (member term once) + (setf once (delete term once))) + (progn (push term seen) + (push term once)))) + ((consp term) (recur (car term)) + (recur (cdr term))) + (t nil))) + once)) + + +(defun determine-clause-properties (head body) + (let* ((clause + (cons head body)) + (permanent-vars + (if (null head) + ;; For query clauses we cheat a bit and make ALL variables + ;; permanent (except ?, of course), so we can extract their + ;; bindings as results later. + (remove +wildcard-symbol+ (find-variables body)) + (find-permanent-variables clause))) + (anonymous-vars + (if (null head) + ;; Again, for queries we cheat and never let anything be + ;; anonymous (except for the wildcard). + (list +wildcard-symbol+) + (cons +wildcard-symbol+ + (find-anonymous-variables clause)))) + (nead-vars + (set-difference (find-nead-variables clause) + permanent-vars)) + (nead-arity + (max (1- (length head)) + (1- (length (first (remove '! body))))))) ; gross + (make-clause-properties :nead-vars nead-vars + :nead-arity nead-arity + :permanent-vars permanent-vars + :anonymous-vars anonymous-vars))) + + diff -r 19200659513a -r 81939d20415a src/compiler/1-parsing.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/1-parsing.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,202 @@ +(in-package #:bones.wam) + +;;;; .-,--. +;;;; '|__/ ,-. ,-. ,-. . ,-. ,-. +;;;; ,| ,-| | `-. | | | | | +;;;; `' `-^ ' `-' ' ' ' `-| +;;;; ,| +;;;; `' + +; todo functor -> fname + +(defstruct node) + + +(defstruct (top-level-node (:include node)) + (functor nil :type symbol) + (arity 0 :type arity) + (arguments nil :type list)) + +(defstruct (vanilla-node (:include node) + (:conc-name node-)) + ;; The register allocated to store this node. + (register nil :type (or null register))) + + +(defstruct (structure-node (:include vanilla-node) + (:conc-name node-)) + (functor nil :type symbol) + (arity 0 :type arity) + (arguments nil :type list)) + +(defstruct (variable-node (:include vanilla-node) + (:conc-name node-)) + (variable nil :type symbol)) + +(defstruct (argument-variable-node (:include variable-node) + (:conc-name node-)) + ;; The register that actually holds the variable (NOT the argument register). + (secondary-register nil :type (or null register))) + +(defstruct (list-node (:include vanilla-node) + (:conc-name node-)) + (head (error "Head argument required") :type node) + (tail (error "Head argument required") :type node)) + +(defstruct (lisp-object-node (:include vanilla-node) + (:conc-name node-)) + (object nil :type t)) + + +(defgeneric node-children (node) + (:documentation + "Return the children of the given node. + + Presumably these will need to be traversed when allocating registers.")) + +(defmethod node-children ((node vanilla-node)) + (list)) + +(defmethod node-children ((node top-level-node)) + (top-level-node-arguments node)) + +(defmethod node-children ((node structure-node)) + (node-arguments node)) + +(defmethod node-children ((node list-node)) + (list (node-head node) (node-tail node))) + + +(defun nil-node-p (node) + "Return whether the given node is the magic nil/0 constant." + (and (typep node 'structure-node) + (eql (node-functor node) nil) + (zerop (node-arity node)))) + + +(defparameter *dump-node-indent* 0) + +(defun print-node-register (node stream &optional space-before) + (when (slot-boundp node 'register) + (format stream (if space-before " ~A =" "~A = ") (node-register node)))) + +(defun print-node-secondary-register (node stream &optional space-before) + (when (slot-boundp node 'secondary-register) + (format stream + (if space-before " ~A =" "~A = ") + (node-secondary-register node)))) + + +(defgeneric dump-node (node)) + +(defmethod dump-node ((node node)) + (format t "~VAAN NODE" *dump-node-indent* "")) + +(defmethod dump-node ((node variable-node)) + (format t "~VA#" (node-variable node))) + +(defmethod dump-node ((node argument-variable-node)) + (format t "~VA#" (node-variable node))) + +(defmethod dump-node ((node structure-node)) + (format t "~VA#")) + +(defmethod dump-node ((node list-node)) + (format t "~VA#")) + +(defmethod dump-node ((node lisp-object-node)) + (format t "~VA#" (lisp-object-to-string (node-object node)))) + +(defmethod dump-node ((node top-level-node)) + (with-slots (functor arity arguments) node + (format t "#<~A/~D" functor arity) + (let ((*dump-node-indent* 4)) + (dolist (n arguments) + (terpri) + (dump-node n))) + (format t ">"))) + +(defmethod print-object ((node node) stream) + (let ((*standard-output* stream)) + (dump-node node))) + + +(defun parse-list (contents) + (if contents + (make-list-node :head (parse (car contents)) + :tail (parse-list (cdr contents))) + (make-structure-node :functor nil + :arity 0 + :arguments ()))) + +(defun parse-list* (contents) + (destructuring-bind (next . remaining) contents + (if (null remaining) + (parse next) + (make-list-node :head (parse next) + :tail (parse-list* remaining))))) + +(defun parse (term &optional top-level-argument) + (cond + ((variablep term) + (if top-level-argument + (make-argument-variable-node :variable term) + (make-variable-node :variable term))) + ((symbolp term) + (parse (list term))) ; c/0 -> (c/0) + ((consp term) + (destructuring-bind (functor . arguments) term + (when (not (symbolp functor)) + (error + "Cannot parse term ~S because ~S is not a valid functor." + term functor)) + (case functor + (list (parse-list arguments)) + (list* (parse-list* arguments)) + (t (make-structure-node :functor functor + :arity (length arguments) + :arguments (mapcar #'parse arguments)))))) + ((numberp term) + (make-lisp-object-node :object term)) + (t (error "Cannot parse term ~S into a Prolog term." term)))) + +(defun parse-top-level (term) + (typecase term + (symbol (parse-top-level (list term))) ; c/0 -> (c/0) + (cons (destructuring-bind (functor . arguments) term + (when (not (symbolp functor)) + (error + "Cannot parse top-level term ~S because ~S is not a valid functor." + term functor)) + (make-top-level-node :functor functor + :arity (length arguments) + :arguments (mapcar (lambda (a) (parse a t)) + arguments)))) + (t (error "Cannot parse top-level term ~S into a Prolog term." term)))) + + diff -r 19200659513a -r 81939d20415a src/compiler/2-register-allocation.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/2-register-allocation.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,287 @@ +(in-package #:bones.wam) + +;;;; .-,--. . ,. . . . +;;;; `|__/ ,-. ,-. . ,-. |- ,-. ,-. / | | | ,-. ,-. ,-. |- . ,-. ,-. +;;;; )| \ |-' | | | `-. | |-' | /~~|-. | | | | | ,-| | | | | | | +;;;; `' ` `-' `-| ' `-' `' `-' ' ,' `-' `' `' `-' `-' `-^ `' ' `-' ' ' +;;;; ,| +;;;; `' + +;;; You might want to grab a coffee for this one. +;;; +;;; Consider this simple Prolog example: `p(A, q(A, r(B)))`. We're going to get +;;; this as a Lisp list: `(p :a (q :a (r b)))`. +;;; +;;; The goal is to turn this list into a set of register assignments. The book +;;; handwaves around how to do this, and it turns out to be pretty complicated. +;;; This example will (maybe, read on) be turned into: +;;; +;;; A0 <- X2 +;;; A1 <- (q X2 X3) +;;; X2 <- :a +;;; X3 <- (r X4) +;;; X4 <- :b +;;; +;;; There are a few things to note here. First: like the book says, the +;;; outermost predicate is stripped off and returned separately (later it'll be +;;; used to label the code for a program, or to figure out the procedure to call +;;; for a query). +;;; +;;; The first N registers are designated as argument registers. Structure +;;; assignments can live directly in the argument registers, but variables +;;; cannot. In the example above we can see that A1 contains a structure +;;; assignment. However, the variable `:a` doesn't live in A0 -- it lives in +;;; X2, which A0 points at. The books neglects to explain this little fact. +;;; +;;; The next edge case is permanent variables, which the book does talk about. +;;; Permanent variables are allocated to stack registers, so if `:b` was +;;; permanent in our example we'd get: +;;; +;;; A0 <- X2 +;;; A1 <- (q X2 X3) +;;; X2 <- :a +;;; X3 <- (r Y0) +;;; Y0 <- :b +;;; +;;; Note that the mapping of permanent variables to stack register numbers has +;;; to be consistent as we compile all the terms in a clause, so we cheat a bit +;;; here and just always add them all, in order, to the register assignment +;;; produced when parsing. They'll get flattened away later anyway -- it's the +;;; USES that we actually care about. In our example, the `Y0 <- :b` will get +;;; flattened away, but the USE of Y0 in X3 will remain). +;;; +;;; We're almost done, I promise, but there's one more edge case to deal with. +;;; +;;; When we've got a clause with a head and at least one body term, we need the +;;; head term and the first body term to share argument/local registers. For +;;; example, if we have the clause `p(Cats) :- q(A, B, C, Cats)` then when +;;; compiling the head `(p :cats)` we want to get: +;;; +;;; A0 <- X4 +;;; A1 <- ??? +;;; A2 <- ??? +;;; A3 <- ??? +;;; X4 <- :cats +;;; +;;; And when compiling `(q :a :b :c :cats)` we need: +;;; +;;; A0 <- X5 +;;; A1 <- X6 +;;; A2 <- X7 +;;; A3 <- X4 +;;; X4 <- :cats +;;; X5 <- :a +;;; X6 <- :b +;;; X7 <- :c +;;; +;;; What the hell are those empty argument registers in p? And why did we order +;;; the X registers of q like that? +;;; +;;; The book does not bother to mention this important fact at all, so to find +;;; out that you have to handle this you need to do the following: +;;; +;;; 1. Implement it without this behavior. +;;; 2. Notice your results are wrong. +;;; 3. Figure out the right bytecode on a whiteboard. +;;; 4. Try to puzzle out why that bytecode isn't generated when you do exactly +;;; what the book says. +;;; 5. Scour IRC and the web for scraps of information on what the hell you need +;;; to do here. +;;; 6. Find the answer in a comment squirreled away in a source file somewhere +;;; in a language you don't know. +;;; 7. Drink. +;;; +;;; Perhaps you're reading this comment as part of step 6 right now. If so: +;;; welcome aboard. Email me and we can swap horror stories about this process +;;; over drinks some time. +;;; +;;; Okay, so the clause head and first body term need to share argument/local +;;; registers. Why? To understand this, we need to go back to what Prolog +;;; clauses are supposed to do. +;;; +;;; Imagine we have: +;;; +;;; p(f(X)) :- q(X), ...other goals. +;;; +;;; When we want to check if `p(SOMETHING)` is true, we need to first unify +;;; SOMETHING with `f(X)`. Then we search all of the goals in the body, AFTER +;;; substituting in any X's in those goals with the X from the result of the +;;; unification. +;;; +;;; This substitution is why we need the head and the first term in the body to +;;; share the same argument/local registers. By sharing the registers, when the +;;; body term builds a representation of itself on the stack before calling its +;;; predicate any references to X will be point at the (unified) results instead +;;; of fresh ones (because they'll be compiled as `put_value` instead of +;;; `put_variable`). +;;; +;;; But wait: don't we need to substitute into ALL the body terms, not just the +;;; first one? Yes we do, but the trick is that any variables in the REST of +;;; the body that would need to be substituted must, by definition, be permanent +;;; variables! So the substitution process for the rest of the body is handled +;;; automatically with the stack machinery. +;;; +;;; In theory, you could eliminate this edge case by NOT treating the head and +;;; first goal as a single term when searching for permanent variables. Then +;;; all substitution would happen elegantly through the stack. But this +;;; allocates more variables on the stack than you really need (especially for +;;; rules with just a single term in the body (which is many of them)), so we +;;; have this extra corner case to optimize it away. +;;; +;;; In the following code these variables will be called "nead variables" +;;; because: +;;; +;;; 1. They're present in the head of the clause. +;;; 2. They're present in the first term of the body (the "neck", as referred to +;;; in "neck cut" and such). +;;; 3. https://www.urbandictionary.com/define.php?term=nead&defid=1488946 +;;; +;;; We now return you to your regularly scheduled Lisp code. + + +(defstruct allocation-state + (local-registers (make-queue) :type queue) + (stack-registers nil :type list) + (permanent-variables nil :type list) + (anonymous-variables nil :type list) + (reserved-variables nil :type list) + (reserved-arity nil :type (or null arity)) + (actual-arity 0 :type arity)) + + +(defun find-variable (state variable) + "Return the register that already contains this variable, or `nil` otherwise." + (or (when-let (r (position variable + (queue-contents + (allocation-state-local-registers state)))) + (make-temporary-register r (allocation-state-actual-arity state))) + (when-let (s (position variable + (allocation-state-stack-registers state))) + (make-permanent-register s)) + nil)) + +(defun store-variable (state variable) + "Assign `variable` to the next available local register. + + It is assumed that `variable` is not already assigned to another register + (check that with `find-variable` first). + + It is also assumed that this will be a non-argument register, because as + mentioned above variables cannot live directly inside argument registers. + + " + (make-register + :local + (1- (enqueue variable (allocation-state-local-registers state))))) + +(defun ensure-variable (state variable) + (or (find-variable state variable) + (store-variable state variable))) + + +(defmacro set-when-nil ((accessor instance) value-form) + (once-only (instance) + `(when (not (,accessor ,instance)) + (setf (,accessor ,instance) ,value-form)))) + + +(defun variable-anonymous-p (state variable) + "Return whether `variable` is considered anonymous in `state`." + (and (member variable (allocation-state-anonymous-variables state)) t)) + + +(defun allocate-variable-register (state variable) + (if (variable-anonymous-p state variable) + (make-anonymous-register) + (ensure-variable state variable))) + +(defun allocate-nonvariable-register (state) + "Allocate and return a register for something that's not a variable." + ;; We need to allocate registers for things like structures and lists, but we + ;; never need to look them up later (like we do with variables), so we'll just + ;; shove a nil into the local registers array as a placeholder. + (make-temporary-register + (enqueue nil (allocation-state-local-registers state)) + (allocation-state-actual-arity state))) + + +(defgeneric allocate-register (node allocation-state)) + + +(defmethod allocate-register ((node top-level-node) state) + (declare (ignore node state)) + nil) + +(defmethod allocate-register ((node variable-node) state) + (set-when-nil (node-register node) + (allocate-variable-register state (node-variable node)))) + +(defmethod allocate-register ((node argument-variable-node) state) + (set-when-nil (node-secondary-register node) + (allocate-variable-register state (node-variable node)))) + +(defmethod allocate-register ((node structure-node) state) + (set-when-nil (node-register node) + (allocate-nonvariable-register state))) + +(defmethod allocate-register ((node list-node) state) + (set-when-nil (node-register node) + (allocate-nonvariable-register state))) + +(defmethod allocate-register ((node lisp-object-node) state) + (set-when-nil (node-register node) + (allocate-nonvariable-register state))) + + +(defun allocate-argument-registers (node) + (loop :for argument :in (top-level-node-arguments node) + :for i :from 0 + :do (setf (node-register argument) + (make-register :argument i)))) + +(defun allocate-nonargument-registers (node clause-props &key nead) + ;; JESUS TAKE THE WHEEL + (let* + ((actual-arity (top-level-node-arity node)) + (reserved-arity (when nead + (clause-nead-arity clause-props))) + (reserved-variables (when nead + (clause-nead-vars clause-props))) + (permanent-variables (clause-permanent-vars clause-props)) + (local-registers (make-queue)) + ;; We essentially "preallocate" all the permanent variables up front + ;; because we need them to always be in the same stack registers across + ;; all the terms of our clause. + ;; + ;; The ones that won't get used in this term will end up getting + ;; flattened away anyway. + (stack-registers permanent-variables) + (allocation-state + (make-allocation-state + :local-registers local-registers + :stack-registers stack-registers + :permanent-variables permanent-variables + :anonymous-variables (clause-anonymous-vars clause-props) + :reserved-variables reserved-variables + :reserved-arity reserved-arity + :actual-arity actual-arity))) + ;; Preallocate enough registers for all of the arguments. We'll fill + ;; them in later. Note that things are more complicated in the head and + ;; first body term of a clause (see above). + (loop :repeat (or reserved-arity actual-arity) + :do (enqueue nil local-registers)) + ;; Actually reserve the reserved (but non-permanent, see above) variables. + ;; They need to live in consistent spots for the head and first body term. + (loop :for variable :in reserved-variables + :do (enqueue variable local-registers)) + (recursively ((remaining (list node))) + (when remaining + (destructuring-bind (node . remaining) remaining + (allocate-register node allocation-state) + (recur (append remaining (node-children node)))))))) + +(defun allocate-registers (node clause-props &key nead) + (allocate-argument-registers node) + (allocate-nonargument-registers node clause-props :nead nead)) + + diff -r 19200659513a -r 81939d20415a src/compiler/3-flattening.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/3-flattening.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,136 @@ +(in-package #:bones.wam) + +;;;; .-,--' . . . +;;;; \|__ | ,-. |- |- ,-. ,-. . ,-. ,-. +;;;; | | ,-| | | |-' | | | | | | | +;;;; `' `' `-^ `' `' `-' ' ' ' ' ' `-| +;;;; ,| +;;;; `' + +;;; "Flattening" is the process of turning a parse tree (with register +;;; assignments) into a flat list of nodes, which will then be turned into +;;; a series of instructions. +;;; +;;; The order of this list depends on whether we're compiling a query term or +;;; a program term. +;;; +;;; Turns: +;;; +;;; X0 <- p(X1, X2) +;;; X1 <- A +;;; X2 <- q(X1, X3) +;;; X3 <- B +;;; +;;; into something like: +;;; +;;; X2 <- q(X1, X3) +;;; X0 <- p(X1, X2) + + +(defstruct (register-assignment + (:conc-name assignment-)) + (register (required) :type register)) + + +(defstruct (structure-assignment (:include register-assignment) + (:conc-name assignment-)) + (functor nil :type symbol) + (arity 0 :type arity) + (arguments () :type list)) + +(defstruct (argument-variable-assignment (:include register-assignment) + (:conc-name assignment-)) + (target (required) :type register)) + +(defstruct (list-assignment (:include register-assignment) + (:conc-name assignment-)) + (head (required) :type register) + (tail (required) :type register)) + +(defstruct (lisp-object-assignment (:include register-assignment) + (:conc-name assignment-)) + (object nil :type t)) + + +(defmethod print-object ((assignment structure-assignment) stream) + (print-unreadable-object (assignment stream :type nil :identity nil) + (format stream "~A = ~A/~D(~{~A~^, ~})" + (register-to-string (assignment-register assignment)) + (assignment-functor assignment) + (assignment-arity assignment) + (mapcar #'register-to-string (assignment-arguments assignment))))) + +(defmethod print-object ((assignment argument-variable-assignment) stream) + (print-unreadable-object (assignment stream :type nil :identity nil) + (format stream "~A = ~A" + (register-to-string (assignment-register assignment)) + (register-to-string (assignment-target assignment))))) + +(defmethod print-object ((assignment list-assignment) stream) + (print-unreadable-object (assignment stream :type nil :identity nil) + (format stream "~A = [~A | ~A]" + (register-to-string (assignment-register assignment)) + (register-to-string (assignment-head assignment)) + (register-to-string (assignment-tail assignment))))) + +(defmethod print-object ((assignment lisp-object-assignment) stream) + (print-unreadable-object (assignment stream :type nil :identity nil) + (format stream "~A = ~A" + (register-to-string (assignment-register assignment)) + (lisp-object-to-string (assignment-object assignment))))) + + +(defgeneric node-flatten (node)) + +(defmethod node-flatten (node) + nil) + +(defmethod node-flatten ((node structure-node)) + (values (make-structure-assignment + :register (node-register node) + :functor (node-functor node) + :arity (node-arity node) + :arguments (mapcar #'node-register (node-arguments node))))) + +(defmethod node-flatten ((node argument-variable-node)) + (values (make-argument-variable-assignment + :register (node-register node) + :target (node-secondary-register node)))) + +(defmethod node-flatten ((node list-node)) + (values (make-list-assignment + :register (node-register node) + :head (node-register (node-head node)) + :tail (node-register (node-tail node))))) + +(defmethod node-flatten ((node lisp-object-node)) + (values (make-lisp-object-assignment + :register (node-register node) + :object (node-object node)))) + + +(defun flatten-breadth-first (tree) + (let ((results nil)) + (recursively ((node tree)) + (when-let (assignment (node-flatten node)) + (push assignment results)) + (mapc #'recur (node-children node))) + (nreverse results))) + +(defun flatten-depth-first-post-order (tree) + (let ((results nil)) + (recursively ((node tree)) + (mapc #'recur (node-children node)) + (when-let (assignment (node-flatten node)) + (push assignment results))) + (nreverse results))) + + +(defun flatten-query (tree) + (flatten-depth-first-post-order tree)) + +(defun flatten-program (tree) + (flatten-breadth-first tree)) + + + diff -r 19200659513a -r 81939d20415a src/compiler/4-tokenization.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/4-tokenization.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,148 @@ +(in-package #:bones.wam) + +;;;; ,--,--' . . +;;;; `- | ,-. | , ,-. ,-. . ,_, ,-. |- . ,-. ,-. +;;;; , | | | |< |-' | | | / ,-| | | | | | | +;;;; `-' `-' ' ` `-' ' ' ' '"' `-^ `' ' `-' ' ' + +;;; Tokenizing takes a flattened set of assignments and turns it into a stream +;;; of structure assignments and bare registers. +;;; +;;; It turns: +;;; +;;; X2 <- q(X1, X3) +;;; X0 <- p(X1, X2) +;;; A3 <- X4 +;;; +;;; into something like: +;;; +;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2, (A3 = X4) + + +(defclass token () ()) + + +(defclass register-token (token) + ((register :accessor token-register :type register :initarg :register))) + +(defclass structure-token (register-token) + ((functor :accessor token-functor :type symbol :initarg :functor) + (arity :accessor token-arity :type arity :initarg :arity))) + +(defclass argument-variable-token (register-token) + ((target :accessor token-target :type register :initarg :target))) + +(defclass list-token (register-token) ()) + +(defclass lisp-object-token (register-token) + ((object :accessor token-object :type t :initarg :object))) + +(defclass procedure-call-token () + ((functor :accessor token-functor :type symbol :initarg :functor) + (arity :accessor token-arity :type arity :initarg :arity))) + +(defclass call-token (procedure-call-token) ()) + +(defclass jump-token (procedure-call-token) ()) + +(defclass cut-token (token) ()) + + +(defun make-register-token (register) + (values (make-instance 'register-token :register register))) + + +(defmethod print-object ((token register-token) stream) + (print-object (token-register token) stream)) + +(defmethod print-object ((token structure-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "~A = ~A/~D" + (register-to-string (token-register token)) + (token-functor token) + (token-arity token)))) + +(defmethod print-object ((token argument-variable-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "~A = ~A" + (register-to-string (token-register token)) + (register-to-string (token-target token))))) + +(defmethod print-object ((token list-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "~A = LIST" (register-to-string (token-register token))))) + +(defmethod print-object ((token lisp-object-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "~A = ~A" + (register-to-string (token-register token)) + (lisp-object-to-string (token-object token))))) + +(defmethod print-object ((token call-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "CALL ~A/~D" + (token-functor token) + (token-arity token)))) + +(defmethod print-object ((token jump-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "JUMP ~A/~D" + (token-functor token) + (token-arity token)))) + +(defmethod print-object ((token cut-token) stream) + (print-unreadable-object (token stream :identity nil :type nil) + (format stream "CUT!"))) + + +(defgeneric tokenize-assignment (assignment) + (:documentation "Tokenize `assignment` into a flat list of tokens.")) + +(defmethod tokenize-assignment ((assignment structure-assignment)) + (list* (make-instance 'structure-token + :register (assignment-register assignment) + :functor (assignment-functor assignment) + :arity (assignment-arity assignment)) + (mapcar #'make-register-token (assignment-arguments assignment)))) + +(defmethod tokenize-assignment ((assignment argument-variable-assignment)) + (list (make-instance 'argument-variable-token + :register (assignment-register assignment) + :target (assignment-target assignment)))) + +(defmethod tokenize-assignment ((assignment list-assignment)) + (list (make-instance 'list-token :register (assignment-register assignment)) + (make-register-token (assignment-head assignment)) + (make-register-token (assignment-tail assignment)))) + +(defmethod tokenize-assignment ((assignment lisp-object-assignment)) + (list (make-instance 'lisp-object-token + :register (assignment-register assignment) + :object (assignment-object assignment)))) + +(defun tokenize-assignments (assignments) + "Tokenize a flattened set of register assignments into a stream." + (mapcan #'tokenize-assignment assignments)) + + +(defun tokenize-program-term (term clause-props) + "Tokenize `term` as a program term, returning its tokens." + (let ((tree (parse-top-level term))) + (allocate-registers tree clause-props :nead t) + (-> tree flatten-program tokenize-assignments))) + +(defun tokenize-query-term (term clause-props &key in-nead is-tail) + "Tokenize `term` as a query term, returning its tokens." + (let ((tree (parse-top-level term))) + (allocate-registers tree clause-props :nead in-nead) + (-<> tree + flatten-query + tokenize-assignments + ;; We need to shove a CALL/JUMP token onto the end. + (append <> (list (make-instance + (if is-tail 'jump-token 'call-token) + :functor (top-level-node-functor tree) + :arity (top-level-node-arity tree))))))) + + + diff -r 19200659513a -r 81939d20415a src/compiler/5-precompilation.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/5-precompilation.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,420 @@ +(in-package #:bones.wam) + +;;;; .-,--. . . +;;;; '|__/ ,-. ,-. ,-. ,-. ,-,-. ,-. . | ,-. |- . ,-. ,-. +;;;; ,| | |-' | | | | | | | | | | ,-| | | | | | | +;;;; `' ' `-' `-' `-' ' ' ' |-' ' `' `-^ `' ' `-' ' ' +;;;; | +;;;; ' + +;;; Once we have a tokenized stream we can generate the machine instructions +;;; from it. +;;; +;;; We don't generate the ACTUAL bytecode immediately, because we want to run +;;; a few optimization passes on it first, and it's easier to work with if we +;;; have a friendlier format. +;;; +;;; So we turn a stream of tokens: +;;; +;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2 +;;; +;;; into a list of instructions, each of which is a list: +;;; +;;; (:put-structure X2 q 2) +;;; (:subterm-variable X1) +;;; (:subterm-variable X3) +;;; (:put-structure X0 p 2) +;;; (:subterm-value X1) +;;; (:subterm-value X2) +;;; +;;; The opcodes are keywords and the register arguments remain register objects. +;;; They get converted down to the raw bytes in the final "rendering" step. +;;; +;;; # Cut +;;; +;;; A quick note on cut (!): the book and original WAM do some nutty things to +;;; save one stack word per frame. They store the cut register for non-neck +;;; cuts in a "pseudovariable" on the stack, so they only have to allocate that +;;; extra stack word for things that actually USE non-neck cuts. +;;; +;;; We're going to just eat the extra stack word and store the cut register in +;;; every frame instead. This massively simplifies the implementation and lets +;;; me keep my sanity, and it MIGHT even end up being faster because there's +;;; one fewer opcode, less fucking around in the compiler, etc. But regardless: +;;; I don't want to go insane, and my laptop has sixteen gigabytes of RAM, so +;;; let's just store the damn word. +;;; +;;; # "Seen" Registers +;;; +;;; The book neglects to mention some REALLY important information about how you +;;; have to handle registers when compiling a stream of tokens. But if you've +;;; made it this far, you should be pretty used to the book omitting vital +;;; information. So hop in the clown car and take a ride with me. +;;; +;;; From the very beginning,the book mentions that certain instructions come in +;;; pairs, the first of which is used the first time the register is "seen" or +;;; "encountered", and the second used of which is used subsequent times. +;;; +;;; For example, a simple query like `p(A, A, A)` would result in: +;;; +;;; put-variable A0 X3 +;;; put-value A1 X3 +;;; put-value A2 X3 +;;; call p/3 +;;; +;;; This is all fine and dandy and works for single goals, but if you have +;;; a clause with MULTIPLE body goals you need to "reset" the list of +;;; already-seen registers after each goal. For example, consider: +;;; +;;; p() :- +;;; f(X, X), +;;; g(Y, Y). +;;; +;;; If you just apply what the book says without resetting the already-seen +;;; register list, you get: +;;; +;;; put-variable A0 X2 +;;; put-value A1 X2 +;;; call f/2 +;;; put-value A0 X2 <--- wrong! +;;; put-value A1 X2 +;;; call g/2 +;;; +;;; But the variable in `g/2` is DIFFERENT than the one used in `f/2`, so that +;;; second `put-value` instruction is wrong! What we need instead is this: +;;; +;;; put-variable A0 X2 +;;; put-value A1 X2 +;;; call f/2 +;;; put-variable A0 X2 <--- right! +;;; put-value A1 X2 +;;; call g/2 +;;; +;;; So the list of seen registers needs to get cleared after each body goal. +;;; +;;; But be careful: it's only TEMPORARY registers that need to get cleared! If +;;; the variables in our example WEREN'T different (`p() :- f(X, X), g(X, X)`) +;;; the instructions would be assigning to stack registers, and we WANT to do +;;; one `put-variable` and have the rest be `put-value`s. +;;; +;;; And there's one more edge case you're probably wondering about: what happens +;;; after the HEAD of a clause? Do we need to reset? The answer is: no, +;;; because the head and first body goal share registers, which is what performs +;;; the "substitution" for the first body goal (see the comment earlier for more +;;; on that rabbit hole). + + +(defun find-opcode-register (first-seen register) + (let ((register-variant (when register + (ecase (register-type register) + ((:local :argument) :local) + ((:permanent) :stack) + ((:anonymous) :void))))) + (if first-seen + (ecase register-variant + (:local :subterm-variable-local) + (:stack :subterm-variable-stack) + (:void :subterm-void)) + (ecase register-variant + (:local :subterm-value-local) + (:stack :subterm-value-stack) + (:void :subterm-void))))) + +(defun find-opcode-list (mode) + (ecase mode + (:program :get-list) + (:query :put-list))) + +(defun find-opcode-lisp-object (mode) + (ecase mode + (:program :get-lisp-object) + (:query :put-lisp-object))) + +(defun find-opcode-structure (mode) + (ecase mode + (:program :get-structure) + (:query :put-structure))) + +(defun find-opcode-argument (first-seen mode register) + (let ((register-variant (ecase (register-type register) + ((:local :argument) :local) + ((:permanent) :stack)))) + (if first-seen + (ecase mode + (:program (ecase register-variant + (:local :get-variable-local) + (:stack :get-variable-stack))) + (:query (ecase register-variant + (:local :put-variable-local) + (:stack :put-variable-stack)))) + (ecase mode + (:program (ecase register-variant + (:local :get-value-local) + (:stack :get-value-stack))) + (:query (ecase register-variant + (:local :put-value-local) + (:stack :put-value-stack))))))) + + +(defun precompile-tokens (head-tokens body-tokens) + "Generate a series of machine instructions from a stream of head and body + tokens. + + The `head-tokens` should be program-style tokens, and are compiled in program + mode. The `body-tokens` should be query-style tokens, and are compiled in + query mode. + + Actual queries are a special case where the `head-tokens` stream is `nil` + + The compiled instructions will be returned as a circle. + + " + (let ((seen (list)) + (mode nil) + (instructions (make-empty-circle))) + (labels + ((push-instruction (&rest instruction) + (circle-insert-end instructions instruction)) + (reset-seen () + ;; Reset the list of seen registers (grep for "clown car" above) + (setf seen (remove-if #'register-temporary-p seen))) + (handle-argument (argument-register source-register) + (if (register-anonymous-p source-register) + (ecase mode + ;; Query terms need to put an unbound var into their argument + ;; register for each anonymous variable. + (:query (push-instruction :put-void argument-register)) + ;; Crazy, but for program terms we can just drop + ;; argument-position anonymous variables on the floor. + (:program nil)) + ;; OP X_n A_i + (let ((first-seen (push-if-new source-register seen :test #'register=))) + (push-instruction + (find-opcode-argument first-seen mode source-register) + source-register + argument-register)))) + (handle-structure (destination-register functor arity) + ;; OP functor reg + (push destination-register seen) + (push-instruction (find-opcode-structure mode) + functor + arity + destination-register)) + (handle-list (register) + (push register seen) + (push-instruction (find-opcode-list mode) + register)) + (handle-lisp-object (register object) + ;; OP object register + (push register seen) + (push-instruction (find-opcode-lisp-object mode) object register)) + (handle-cut () + (push-instruction :cut)) + (handle-procedure-call (functor arity is-jump) + (if (and (eq functor 'call) + (= arity 1)) + ;; DYNAMIC-[CALL/JUMP] + (push-instruction (if is-jump :dynamic-jump :dynamic-call)) + ;; [CALL/JUMP] functor + (push-instruction (if is-jump :jump :call) functor arity)) + ;; This is a little janky, but at this point the body goals have been + ;; turned into one single stream of tokens, so we don't have a nice + ;; clean way to tell when one ends. But in practice, a body goal is + ;; going to end with a CALL instruction, so we can use this as + ;; a kludge to know when to reset. + ;; + ;; TODO: We should probably dekludge this by emitting an extra "end + ;; body goal" token, especially once we add some special forms that + ;; might need to do some resetting but not end in a CALL. + (reset-seen)) + (handle-register (register) + (if (register-anonymous-p register) + ;; VOID 1 + (push-instruction (find-opcode-register nil register) 1) + ;; OP reg + (let ((first-seen (push-if-new register seen :test #'register=))) + (push-instruction + (find-opcode-register first-seen register) + register)))) + (handle-token (token) + (etypecase token + (argument-variable-token + (handle-argument (token-register token) + (token-target token))) + (structure-token + (handle-structure (token-register token) + (token-functor token) + (token-arity token))) + (list-token + (handle-list (token-register token))) + (lisp-object-token + (handle-lisp-object (token-register token) + (token-object token))) + (cut-token + (handle-cut)) + (jump-token + (handle-procedure-call (token-functor token) + (token-arity token) + t)) + (call-token + (handle-procedure-call (token-functor token) + (token-arity token) + nil)) + (register-token + (handle-register (token-register token))))) + (handle-stream (tokens) + (map nil #'handle-token tokens))) + (when head-tokens + (setf mode :program) + (handle-stream head-tokens)) + (setf mode :query) + (handle-stream body-tokens) + instructions))) + + +(defun precompile-clause (head body) + "Precompile the clause. + + `head` should be the head of the clause for program clauses, or `nil` for + query clauses. + + `body` is the body of the clause, or `nil` for facts. + + Returns a circle of instructions and the properties of the clause. + + " + (let* ((clause-props + (determine-clause-properties head body)) + (head-tokens + (when head + (tokenize-program-term head clause-props))) + (clause-type + (cond ((null head) :query) + ((null body) :fact) + ((null (rest body)) :chain) + (t :rule))) + (body-tokens + (when body + (loop + :with first = t + :for (goal . remaining) :on body + :append + (if (eq goal '!) ; gross + ;; cut just gets emitted straight, but DOESN'T flip `first`... + ;; TODO: fix the cut layering violation here... + (list (make-instance 'cut-token)) + (prog1 + (tokenize-query-term + goal clause-props + :in-nead first + ;; For actual WAM queries we're running, we don't want to + ;; LCO the final CALL because we need that stack frame + ;; (for storing the results). + :is-tail (and (not (eq clause-type :query)) + (null remaining))) + (setf first nil))))))) + (let ((instructions (precompile-tokens head-tokens body-tokens)) + (variable-count (length (clause-permanent-vars clause-props)))) + ;; We need to compile facts and rules differently. Facts end with + ;; a PROCEED and rules are wrapped in ALOC/DEAL. + (ecase clause-type + (:chain + ;; Chain rules don't need anything at all. They just unify, set up + ;; the next predicate's arguments, and JUMP. By definition, in a chain + ;; rule all variables must be temporary, so we don't need a stack frame + ;; at all! + nil) + (:rule ; a full-ass rule + ;; Non-chain rules need an ALLOC at the head and a DEALLOC right before + ;; the tail call: + ;; + ;; ALLOC n + ;; ... + ;; DEAL + ;; JUMP + (circle-insert-beginning instructions `(:allocate ,variable-count)) + (circle-insert-before (circle-backward instructions) `(:deallocate))) + + (:fact + (circle-insert-end instructions `(:proceed))) + + (:query + ;; The book doesn't have this ALOC here, but we do it to aid in result + ;; extraction. Basically, to make extracting th results of a query + ;; easier we allocate all of its variables on the stack, so we need + ;; push a stack frame for them before we get started. We don't DEAL + ;; because we want the frame to be left on the stack at the end so we + ;; can poke at it. + (circle-insert-beginning instructions `(:allocate ,variable-count)) + (circle-insert-end instructions `(:done)))) + (values instructions clause-props)))) + + +(defun precompile-query (query) + "Compile `query`, returning the instructions and permanent variables. + + `query` should be a list of goal terms. + + " + (multiple-value-bind (instructions clause-props) + (precompile-clause nil query) + (values instructions + (clause-permanent-vars clause-props)))) + + +(defun find-predicate (clause) + "Return the functor and arity of the predicate of `clause`." + ;; ( (f ?x ?y) | head ||| clause + ;; (foo ?x) || body ||| + ;; (bar ?y) ) || ||| + (let ((head (car clause))) + (etypecase head + (null (error "Clause ~S has a NIL head." clause)) + (symbol (values head 0)) ; constants are 0-arity + (cons (values (car head) ; (f ...) + (1- (length head)))) + (t (error "Clause ~S has a malformed head." clause))))) + + +(defun precompile-rules (rules) + "Compile a single predicate's `rules` into a list of instructions. + + All the rules must for the same predicate. This is not checked, for + performance reasons. Don't fuck it up. + + Each rule in `rules` should be a clause consisting of a head term and zero or + more body terms. A rule with no body is called a fact. + + Returns the circle of compiled instructions, as well as the functor and arity + of the rules being compiled. + + " + (assert rules () "Cannot compile an empty program.") + (multiple-value-bind (functor arity) (find-predicate (first rules)) + (values + (if (= 1 (length rules)) + ;; Single-clause rules don't need to bother setting up a choice point. + (destructuring-bind ((head . body)) rules + (precompile-clause head body)) + ;; Otherwise we need to loop through each of the clauses, pushing their + ;; choice point instruction first, then their actual code. + ;; + ;; The `nil` clause addresses will get filled in later, during rendering. + (loop :with instructions = (make-empty-circle) + :for ((head . body) . remaining) :on rules + :for first-p = t :then nil + :for last-p = (null remaining) + :for clause-instructions = (precompile-clause head body) + :do (progn + (circle-insert-end + instructions + (cond (first-p `(:try ,+choice-point-placeholder+)) + (last-p `(:trust)) + (t `(:retry ,+choice-point-placeholder+)))) + (circle-append-circle instructions clause-instructions)) + :finally (return instructions))) + functor + arity))) + + + diff -r 19200659513a -r 81939d20415a src/compiler/6-optimization.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/6-optimization.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,111 @@ +(in-package #:bones.wam) + +;;;; ,,--. . . +;;;; |`, | ,-. |- . ,-,-. . ,_, ,-. |- . ,-. ,-. +;;;; | | | | | | | | | | / ,-| | | | | | | +;;;; `---' |-' `' ' ' ' ' ' '"' `-^ `' ' `-' ' ' +;;;; | +;;;; ' + +;;; Optimization of the WAM instructions happens between the precompilation +;;; phase and the rendering phase. We perform a number of passes over the +;;; circle of instructions, doing one optimization each time. + + +(defun optimize-get-constant (node constant register) + ;; 1. get_structure c/0, Ai -> get_constant c, Ai + (circle-replace node `(:get-constant ,constant ,register))) + +(defun optimize-put-constant (node constant register) + ;; 2. put_structure c/0, Ai -> put_constant c, Ai + (circle-replace node `(:put-constant ,constant ,register))) + +(defun optimize-subterm-constant-query (node constant register) + ;; 3. put_structure c/0, Xi *** WE ARE HERE + ;; ... + ;; subterm_value Xi -> subterm_constant c + (loop + :with previous = (circle-prev node) + ;; Search for the corresponding set-value instruction + :for n = (circle-forward-remove node) :then (circle-forward n) + :while n + :for (opcode . arguments) = (circle-value n) + :when (and (eql opcode :subterm-value-local) + (register= register (first arguments))) + :do + (circle-replace n `(:subterm-constant ,constant)) + (return previous))) + +(defun optimize-subterm-constant-program (node constant register) + ;; 4. subterm_variable Xi -> subterm_constant c + ;; ... + ;; get_structure c/0, Xi *** WE ARE HERE + (loop + ;; Search backward for the corresponding subterm-variable instruction + :for n = (circle-backward node) :then (circle-backward n) + :while n + :for (opcode . arguments) = (circle-value n) + :when (and (eql opcode :subterm-variable-local) + (register= register (first arguments))) + :do + (circle-replace n `(:subterm-constant ,constant)) + (return (circle-backward-remove node)))) + + +(defun optimize-constants (instructions) + ;; From the book and the erratum, there are four optimizations we can do for + ;; constants (0-arity structures). + + (flet ((optimize-put (node functor register) + (if (register-argument-p register) + (optimize-put-constant node functor register) + (optimize-subterm-constant-query node functor register))) + (optimize-get (node functor register) + (if (register-argument-p register) + (optimize-get-constant node functor register) + (optimize-subterm-constant-program node functor register)))) + (loop + :for node = (circle-forward instructions) :then (circle-forward node) + :while node :do + (destructuring-bind (opcode . arguments) (circle-value node) + (when (member opcode '(:put-structure :get-structure)) + (destructuring-bind (functor arity register) arguments + (when (zerop arity) + (setf node + (case opcode + (:put-structure (optimize-put node functor register)) + (:get-structure (optimize-get node functor register)))))))))) + instructions) + + +(defun optimize-void-runs (instructions) + ;; We can optimize runs of N (:unify-void 1) instructions into a single one + ;; that does all N at once. + (loop + :for node = (circle-forward instructions) :then (circle-forward node) + :while node + :for opcode = (car (circle-value node)) + :when (eq opcode :subterm-void) + :do + (loop + :with beginning = (circle-backward node) + :for run-node = node :then (circle-forward run-node) + :for run-opcode = (car (circle-value run-node)) + :while (eq opcode run-opcode) + :do (circle-remove run-node) + :sum 1 :into run-length fixnum ; lol + :finally + (progn + (setf node (circle-forward beginning)) + (circle-insert-after beginning + `(,opcode ,run-length))))) + instructions) + + +(defun optimize-instructions (instructions) + (->> instructions + (optimize-constants) + (optimize-void-runs))) + + + diff -r 19200659513a -r 81939d20415a src/compiler/7-rendering.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/7-rendering.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,156 @@ +(in-package #:bones.wam) + +;;;; .-,--. . +;;;; `|__/ ,-. ,-. ,-| ,-. ,-. . ,-. ,-. +;;;; )| \ |-' | | | | |-' | | | | | | +;;;; `' ` `-' ' ' `-^ `-' ' ' ' ' `-| +;;;; ,| +;;;; `' + +;;; Rendering is the act of taking the friendly list-of-instructions format and +;;; actually converting it to raw-ass bytes and storing it in an array. + + +(defun check-instruction (opcode arguments) + (assert (= (length arguments) + (1- (instruction-size opcode))) + () + "Cannot push opcode ~A with ~D arguments ~S, it requires exactly ~D." + (opcode-name opcode) + (length arguments) + arguments + (1- (instruction-size opcode)))) + + +(defun code-push-instruction (store opcode arguments address) + "Push the given instruction into `store` at `address`. + + `arguments` should be a list of `code-word`s. + + Returns how many words were pushed. + + " + (check-instruction opcode arguments) + (setf (aref store address) opcode + (subseq store (1+ address)) arguments) + (instruction-size opcode)) + + +(defun render-opcode (opcode-designator) + (ecase opcode-designator + (:get-structure +opcode-get-structure+) + (:get-variable-local +opcode-get-variable-local+) + (:get-variable-stack +opcode-get-variable-stack+) + (:get-value-local +opcode-get-value-local+) + (:get-value-stack +opcode-get-value-stack+) + (:put-structure +opcode-put-structure+) + (:put-variable-local +opcode-put-variable-local+) + (:put-variable-stack +opcode-put-variable-stack+) + (:put-value-local +opcode-put-value-local+) + (:put-value-stack +opcode-put-value-stack+) + (:put-void +opcode-put-void+) + (:subterm-variable-local +opcode-subterm-variable-local+) + (:subterm-variable-stack +opcode-subterm-variable-stack+) + (:subterm-value-local +opcode-subterm-value-local+) + (:subterm-value-stack +opcode-subterm-value-stack+) + (:subterm-void +opcode-subterm-void+) + (:put-constant +opcode-put-constant+) + (:get-constant +opcode-get-constant+) + (:subterm-constant +opcode-subterm-constant+) + (:get-list +opcode-get-list+) + (:put-list +opcode-put-list+) + (:get-lisp-object +opcode-get-lisp-object+) + (:put-lisp-object +opcode-put-lisp-object+) + (:jump +opcode-jump+) + (:call +opcode-call+) + (:dynamic-jump +opcode-dynamic-jump+) + (:dynamic-call +opcode-dynamic-call+) + (:proceed +opcode-proceed+) + (:allocate +opcode-allocate+) + (:deallocate +opcode-deallocate+) + (:done +opcode-done+) + (:try +opcode-try+) + (:retry +opcode-retry+) + (:trust +opcode-trust+) + (:cut +opcode-cut+))) + +(defun render-argument (argument) + (cond + ;; Ugly choice point args that'll be filled later... + ((eq +choice-point-placeholder+ argument) 0) + + ;; Bytecode just needs the register numbers. + ((typep argument 'register) (register-number argument)) + + ;; Everything else just gets shoved right into the array. + (t argument))) + +(defun render-bytecode (store instructions start limit) + "Render `instructions` (a circle) into `store` starting at `start`. + + Bail if ever pushed beyond `limit`. + + Return the total number of code words rendered. + + " + (let ((previous-jump nil)) + (flet + ((fill-previous-jump (address) + (when previous-jump + (setf (aref store (1+ previous-jump)) address)) + (setf previous-jump address))) + (loop + :with address = start + + ;; Render the next instruction + :for node = (circle-forward instructions) + :then (or (circle-forward node) + (return instruction-count)) + + :for (opcode-designator . arguments) = (circle-value node) + :for opcode = (render-opcode opcode-designator) + :for size = (instruction-size opcode) + :summing size :into instruction-count + + ;; Make sure we don't run past the end of our section. + :when (>= (+ size address) limit) + :do (error "Code store exhausted, game over.") + + :do (code-push-instruction store + opcode + (mapcar #'render-argument arguments) + address) + + ;; We need to fill in the addresses for the choice point jumping + ;; instructions. For example, when we have TRY ... TRUST, the TRUST + ;; needs to patch its address into the TRY instruction. + ;; + ;; I know, this is ugly, sorry. + :when (member opcode-designator '(:try :retry :trust)) + :do (fill-previous-jump address) + + ;; look, don't judge me, i told you i know its bad + :do (incf address size))))) + + +(defun render-query-into (storage instructions) + (render-bytecode storage instructions 0 +maximum-query-size+)) + + +(defun mark-label (wam functor arity address) + "Set the code label `functor`/`arity` to point at `address`." + (setf (wam-code-label wam functor arity) + address)) + +(defun render-rules (wam functor arity instructions) + ;; Before we render the instructions, make the label point at where they're + ;; about to go. + (mark-label wam functor arity (wam-code-pointer wam)) + (incf (wam-code-pointer wam) + (render-bytecode (wam-code wam) + instructions + (wam-code-pointer wam) + (array-total-size (wam-code wam))))) + + + diff -r 19200659513a -r 81939d20415a src/compiler/8-ui.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/compiler/8-ui.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,50 @@ +(in-package #:bones.wam) + +;;;; ,-. . ,-_/ . +;;;; | | ,-. ,-. ,-. ' | ,-. |- ,-. ,-. ," ,-. ,-. ,-. +;;;; | | . `-. |-' | .^ | | | | |-' | |- ,-| | |-' +;;;; `--^-' `-' `-' ' `--' ' ' `' `-' ' | `-^ `-' `-' +;;;; ' + +;;; The final phase wraps everything else up into a sane UI. + +(defun %compile-query-into (storage query) + (multiple-value-bind (instructions permanent-variables) + (precompile-query query) + (optimize-instructions instructions) + (values permanent-variables + (render-query-into storage instructions)))) + +(defun compile-query (wam query) + "Compile `query` into the query section of the WAM's code store. + + `query` should be a list of goal terms. + + Returns the permanent variables and the size of the compiled bytecode. + + " + (%compile-query-into (wam-code wam) query)) + +(defun compile-query-into (storage query) + "Compile `query` into the given array `storage`. + + `query` should be a list of goal terms. + + Returns the permanent variables and the size of the compiled bytecode. + + " + (%compile-query-into storage query)) + + +(defun compile-rules (wam rules) + "Compile `rules` into the WAM's code store. + + Each rule in `rules` should be a clause consisting of a head term and zero or + more body terms. A rule with no body is called a fact. + + " + (multiple-value-bind (instructions functor arity) + (precompile-rules rules) + (optimize-instructions instructions) + (render-rules wam functor arity instructions))) + diff -r 19200659513a -r 81939d20415a src/constants.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/constants.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,149 @@ +(in-package #:bones.wam) + +(defmacro define-constants (count-symbol &rest symbols) + `(progn + ,@(loop :for c :from 0 + :for s :in symbols + :collect `(define-constant ,s ,c)) + (define-constant ,count-symbol ,(length symbols)))) + + +(define-constant +code-word-size+ 60 + :documentation "Size (in bits) of each word in the code store.") + +(define-constant +code-limit+ (expt 2 +code-word-size+) + :documentation "Maximum size of the WAM code store.") + +(define-constant +code-sentinel+ (1- +code-limit+) + ; TODO: Should this sentinel value be 0 like everything else? + :documentation "Sentinel value used in the PC and CP.") + + +(define-constants +number-of-cell-types+ + +cell-type-null+ + +cell-type-structure+ + +cell-type-reference+ + +cell-type-functor+ + +cell-type-constant+ + +cell-type-list+ + +cell-type-lisp-object+ + +cell-type-stack+) + + +(define-constant +register-count+ 2048 + :documentation "The number of local registers the WAM has available.") + +(define-constant +maximum-arity+ 1024 + :documentation "The maximum allowed arity of functors.") + + +;; TODO Make all this shit configurable at runtime +(define-constant +stack-limit+ 4096 + :documentation "Maximum size of the WAM stack.") + +(define-constant +stack-frame-size-limit+ (+ 7 +register-count+) + :documentation "The maximum size, in stack frame words, that a stack frame could be.") + + +(define-constant +maximum-query-size+ 1024 + :documentation + "The maximum size (in bytes of bytecode) a query may compile to.") + +(define-constant +maximum-instruction-size+ 4 + :documentation + "The maximum number of code words an instruction (including opcode) might be.") + +(define-constant +code-query-start+ 0 + :documentation "The address in the code store where the query code begins.") + +(define-constant +code-main-start+ +maximum-query-size+ + :documentation "The address in the code store where the main program code begins.") + + +(define-constant +stack-start+ +register-count+ + :documentation "The address in the store of the first cell of the stack.") + +(define-constant +stack-end+ (+ +stack-start+ +stack-limit+) + :documentation + "The address in the store one past the last cell in the stack.") + +(define-constant +heap-start+ +stack-end+ + :documentation "The address in the store of the first cell of the heap.") + + +(define-constant +trail-limit+ array-total-size-limit + ;; TODO: should probably limit this to something more reasonable + :documentation "The maximum number of variables that may exist in the trail.") + +(define-constant +store-limit+ array-total-size-limit + :documentation "Maximum size of the WAM store.") + +(define-constant +heap-limit+ (- +store-limit+ +register-count+ +stack-limit+) + ;; The heap gets whatever's left over after the registers and stack have taken + ;; their chunk of memory. + :documentation "Maximum size of the WAM heap.") + +(define-constant +functor-limit+ array-total-size-limit + ;; Functors are stored in a functor table. + :documentation "The maximum number of functors the WAM can keep track of.") + + +(define-constant +wildcard-symbol+ '?) + + +;;;; Opcodes +(define-constants +number-of-opcodes+ + +opcode-noop+ + + ;; Program + +opcode-get-structure+ + +opcode-get-variable-local+ + +opcode-get-variable-stack+ + +opcode-get-value-local+ + +opcode-get-value-stack+ + + ;; Query + +opcode-put-structure+ + +opcode-put-variable-local+ + +opcode-put-variable-stack+ + +opcode-put-value-local+ + +opcode-put-value-stack+ + +opcode-put-void+ + + ;; Subterm + +opcode-subterm-variable-local+ + +opcode-subterm-variable-stack+ + +opcode-subterm-value-local+ + +opcode-subterm-value-stack+ + +opcode-subterm-void+ + + ;; Control + +opcode-jump+ + +opcode-call+ + +opcode-dynamic-jump+ + +opcode-dynamic-call+ + +opcode-proceed+ + +opcode-allocate+ + +opcode-deallocate+ + +opcode-done+ + +opcode-try+ + +opcode-retry+ + +opcode-trust+ + +opcode-cut+ + + ;; Constants + +opcode-get-constant+ + +opcode-put-constant+ + +opcode-subterm-constant+ + + ;; Lists + +opcode-get-list+ + +opcode-put-list+ + + ;; Lisp Objects + +opcode-get-lisp-object+ + +opcode-put-lisp-object+) + + +;;;; Debug Config +(defparameter *off-by-one* nil) diff -r 19200659513a -r 81939d20415a src/dump.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/dump.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,378 @@ +(in-package #:bones.wam) + +(defun heap-debug (wam address indent-p) + (format + nil "~A~A" + (if indent-p + " " + "") + (cell-typecase (wam address) + ((:reference r) (if (= address r) + "unbound variable " + (format nil "var pointer to ~8,'0X " r))) + ((:structure s) (format nil "struct pointer to ~8,'0X " s)) + ((:functor f) (format nil "functor symbol ~A " f)) + ((:constant c) (format nil "constant symbol ~A " c)) + (t "")))) + + +(defun dump-cell-value (value) + ;; todo flesh this out + (typecase value + (fixnum (format nil "~16,'0X" value)) + (t (format nil "~16<#~;~>")))) + + +(defun dump-heap (wam from to) + ;; This code is awful, sorry. + (format t "HEAP~%") + (format t " +----------+-----+------------------+--------------------------------------+~%") + (format t " | ADDR | TYP | VALUE | DEBUG |~%") + (format t " +----------+-----+------------------+--------------------------------------+~%") + (when (> from (1+ +heap-start+)) + (format t " | â‹® | â‹® | â‹® | |~%")) + (flet ((print-cell (address indent) + (format t " | ~8,'0X | ~A | ~16,'0X | ~36A |~%" + address + (cell-type-short-name (wam-store-type wam address)) + (dump-cell-value (wam-store-value wam address)) + (heap-debug wam address (plusp indent))))) + (loop :with indent = 0 + :for address :from from :below to + :do (progn + (print-cell address indent) + (cell-typecase (wam address) + ((:functor f n) (declare (ignore f)) (setf indent n)) + (t (when (not (zerop indent)) + (decf indent))))))) + (when (< to (wam-heap-pointer wam)) + (format t " | â‹® | â‹® | â‹® | |~%")) + (format t " +----------+-----+------------------+--------------------------------------+~%") + (values)) + + +(defun dump-stack-frame (wam start-address) + (loop :with remaining = nil + :with arg-number = nil + :for address :from start-address + :for offset :from 0 + :for type = (wam-store-type wam address) + :for value = (wam-store-value wam address) + :while (or (null remaining) (plusp remaining)) + :do (format + t " | ~8,'0X | ~A | ~30A|~A~A~A~%" + address + (dump-cell-value value) + (cond + ((= address +stack-start+) "") + ((= offset 0) "CE ===========================") + ((= offset 1) "CP") + ((= offset 2) "CUT") + ((= offset 3) (progn + (setf remaining value + arg-number 0) + (format nil "N: ~D" value))) + (t (prog1 + (format nil " Y~D: ~A ~A" + arg-number + (cell-type-short-name type) + (dump-cell-value value)) + (decf remaining) + (incf arg-number)))) + (if (= address (wam-environment-pointer wam)) " <- E" "") + (if (= address (wam-backtrack-pointer wam)) " <- B" "") + (if (= address (wam-cut-pointer wam)) " <- CUT" "")) + :finally (return address))) + +(defun dump-stack-choice (wam start-address) + (loop :with remaining = nil + :with arg-number = nil + :for address :from start-address + :for offset :from 0 + :for type = (wam-store-type wam address) + :for value = (wam-store-value wam address) + :while (or (null remaining) (plusp remaining)) + :do (format + t " | ~8,'0X | ~A | ~30A|~A~A~A~%" + address + (dump-cell-value value) + (cond + ((= address +stack-start+) "") + ((= offset 0) (progn + (setf remaining value + arg-number 0) + (format nil "N: ~D =============" value))) + ((= offset 1) "CE saved env pointer") + ((= offset 2) "CP saved cont pointer") + ((= offset 3) "CB previous choice") + ((= offset 4) "BP next clause") + ((= offset 5) "TR saved trail pointer") + ((= offset 6) "H saved heap pointer") + (t (prog1 + (format nil " A~D: ~A ~A" + arg-number + (cell-type-short-name type) + (dump-cell-value value)) + (decf remaining) + (incf arg-number)))) + (if (= address (wam-environment-pointer wam)) " <- E" "") + (if (= address (wam-backtrack-pointer wam)) " <- B" "") + (if (= address (wam-cut-pointer wam)) " <- CUT" "")) + :finally (return address))) + +(defun dump-stack (wam) + (format t "STACK~%") + (format t " +----------+------------------+-------------------------------+~%") + (format t " | ADDR | VALUE | |~%") + (format t " +----------+------------------+-------------------------------+~%") + (with-accessors ((e wam-environment-pointer) (b wam-backtrack-pointer)) wam + (when (not (= +stack-start+ e b)) + (loop :with address = (1+ +stack-start+) + :while (< address (wam-stack-top wam)) + :do (cond + ((= address e) (setf address (dump-stack-frame wam address))) + ((= address b) (setf address (dump-stack-choice wam address))) + (t + (format t " | ~8,'0X | | |~%" address) + (incf address)))))) + (format t " +----------+------------------+-------------------------------+~%")) + + +(defun pretty-functor (functor) + (etypecase functor + (symbol (format nil "~A/0" functor)) + (cons (destructuring-bind (symbol . arity) functor + (format nil "~A/~D" symbol arity))))) + +(defun pretty-argument (argument) + (typecase argument + (fixnum (format nil "~4,'0X" argument)) + (t (format nil "#<*>")))) + +(defun pretty-arguments (arguments) + (format nil "~10<~{ ~A~}~;~>" (mapcar #'pretty-argument arguments))) + + +(defgeneric instruction-details (opcode arguments)) + +(defmethod instruction-details ((opcode t) arguments) + (format nil "~A~A" + (opcode-short-name opcode) + (pretty-arguments arguments))) + + +(defmethod instruction-details ((opcode (eql +opcode-get-structure+)) arguments) + (format nil "GETS~A ; X~A = ~A/~D" + (pretty-arguments arguments) + (third arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-structure+)) arguments) + (format nil "PUTS~A ; X~A <- new ~A/~D" + (pretty-arguments arguments) + (third arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-get-variable-local+)) arguments) + (format nil "GVAR~A ; X~A <- A~A" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-get-variable-stack+)) arguments) + (format nil "GVAR~A ; Y~A <- A~A" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-get-value-local+)) arguments) + (format nil "GVLU~A ; X~A = A~A" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-get-value-stack+)) arguments) + (format nil "GVLU~A ; Y~A = A~A" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-variable-local+)) arguments) + (format nil "PVAR~A ; X~A <- A~A <- new unbound REF" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-variable-stack+)) arguments) + (format nil "PVAR~A ; Y~A <- A~A <- new unbound REF" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-value-local+)) arguments) + (format nil "PVLU~A ; A~A <- X~A" + (pretty-arguments arguments) + (second arguments) + (first arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-value-stack+)) arguments) + (format nil "PVLU~A ; A~A <- Y~A" + (pretty-arguments arguments) + (second arguments) + (first arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-call+)) arguments) + (format nil "CALL~A ; call ~A/~D" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-jump+)) arguments) + (format nil "JUMP~A ; jump ~A/~D" + (pretty-arguments arguments) + (first arguments) + (second arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-dynamic-call+)) arguments) + (format nil "DYCL~A ; dynamic call" + (pretty-arguments arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-dynamic-jump+)) arguments) + (format nil "DYJP~A ; dynamic jump" + (pretty-arguments arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-get-constant+)) arguments) + (format nil "GCON~A ; X~A = CONSTANT ~A" + (pretty-arguments arguments) + (second arguments) + (pretty-functor (first arguments)))) + +(defmethod instruction-details ((opcode (eql +opcode-put-constant+)) arguments) + (format nil "PCON~A ; X~A <- CONSTANT ~A" + (pretty-arguments arguments) + (second arguments) + (pretty-functor (first arguments)))) + +(defmethod instruction-details ((opcode (eql +opcode-subterm-constant+)) arguments) + (format nil "SCON~A ; SUBTERM CONSTANT ~A" + (pretty-arguments arguments) + (pretty-functor (first arguments)))) + +(defmethod instruction-details ((opcode (eql +opcode-get-list+)) arguments) + (format nil "GLST~A ; X~A = [vvv | vvv]" + (pretty-arguments arguments) + (first arguments))) + +(defmethod instruction-details ((opcode (eql +opcode-put-list+)) arguments) + (format nil "PLST~A ; X~A = [vvv | vvv]" + (pretty-arguments arguments) + (first arguments))) + + +(defun functor-table (wam) + (loop + :with result = (make-hash-table) + :for arity :from 0 + :for table :across (wam-code-labels wam) + :when table + :do (maphash (lambda (functor loc) + (setf (gethash loc result) + (cons functor arity))) + table) + :finally (return result))) + +(defun dump-code-store (wam code-store + &optional + (from 0) + (to (length code-store))) + ;; This is a little trickier than might be expected. We have to walk from + ;; address 0 no matter what `from` we get, because instruction sizes vary and + ;; aren't aligned. So if we just start at `from` we might start in the middle + ;; of an instruction and everything would be fucked. + (let ((addr 0) + (lbls (functor-table wam))) ; oh god + (while (< addr to) + (let ((instruction (retrieve-instruction code-store addr))) + (when (>= addr from) + (when (not (= +opcode-noop+ (aref instruction 0))) + + (let ((lbl (gethash addr lbls))) ; forgive me + (when lbl + (format t ";;;; BEGIN ~A~%" + (pretty-functor lbl)))) + (format t ";~A~4,'0X: " + (if (= (wam-program-counter wam) addr) + ">>" + " ") + addr) + (format t "~A~%" (instruction-details (aref instruction 0) + (rest (coerce instruction 'list)))))) + (incf addr (length instruction)))))) + +(defun dump-code + (wam + &optional + (from (max (- (wam-program-counter wam) 8) ; wow + 0)) ; this + (to (min (+ (wam-program-counter wam) 8) ; is + (length (wam-code wam))))) ; bad + (format t "CODE (size: ~D frame(s) / ~:[OPEN~;CLOSED~])~%" + (length (wam-logic-stack wam)) + (wam-logic-closed-p wam)) + (dump-code-store wam (wam-code wam) from to)) + + +(defun dump-wam-registers (wam) + (format t "REGISTERS:~%") + (format t "~5@A -> ~8X~%" "S" (wam-subterm wam)) + (loop :for register :from 0 :to +register-count+ + :for type = (wam-store-type wam register) + :for value = (wam-store-value wam register) + :when (not (cell-type-p (wam register) :null)) + :do (format t "~5@A -> ~A ~A ~A~%" + (format nil "X~D" register) + (cell-type-short-name type) + (dump-cell-value value) + (format nil "; ~A" (first (extract-things wam (list register))))))) + + +(defun dump-wam-trail (wam) + (format t " TRAIL: ") + (loop :for address :across (wam-trail wam) :do + (format t "~8,'0X //" address)) + (format t "~%")) + + +(defun dump-wam (wam from to) + (format t " FAIL: ~A~%" (wam-fail wam)) + (format t " BACKTRACKED?: ~A~%" (wam-backtracked wam)) + (format t " MODE: ~S~%" (wam-mode wam)) + (format t " HEAP SIZE: ~A~%" (- (wam-heap-pointer wam) +heap-start+)) + (format t " PROGRAM COUNTER: ~4,'0X~%" (wam-program-counter wam)) + (format t "CONTINUATION PTR: ~4,'0X~%" (wam-continuation-pointer wam)) + (format t " ENVIRONMENT PTR: ~4,'0X~%" (wam-environment-pointer wam)) + (format t " BACKTRACK PTR: ~4,'0X~%" (wam-backtrack-pointer wam)) + (format t " CUT PTR: ~4,'0X~%" (wam-cut-pointer wam)) + (format t "HEAP BCKTRCK PTR: ~4,'0X~%" (wam-heap-backtrack-pointer wam)) + (dump-wam-trail wam) + (dump-wam-registers wam) + (format t "~%") + (dump-heap wam from to) + (format t "~%") + (dump-stack wam) + (format t "~%") + (dump-code wam)) + +(defun dump-wam-query-code (wam &optional (max +maximum-query-size+)) + (with-slots (code) wam + (dump-code-store wam code 0 max))) + +(defun dump-wam-code (wam) + (with-slots (code) wam + (dump-code-store wam code +maximum-query-size+ (length code)))) + +(defun dump-wam-full (wam) + (dump-wam wam (1+ +heap-start+) (wam-heap-pointer wam))) + diff -r 19200659513a -r 81939d20415a src/types.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/types.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,127 @@ +(in-package #:bones.wam) + +; (deftype cell-type () ; todo: pick one of these... +; `(integer 0 ,(1- +number-of-cell-types+))) + +(deftype cell-type () + 'fixnum) + +(deftype cell-value () + '(or fixnum t)) + + +(deftype type-store () + '(simple-array cell-type (*))) + +(deftype value-store () + '(simple-array cell-value (*))) + + +(deftype store-index () + `(integer 0 ,(1- +store-limit+))) + +(deftype heap-index () + `(integer ,+heap-start+ ,(1- +store-limit+))) + +(deftype stack-index () + `(integer ,+stack-start+ ,(1- +stack-end+))) + +(deftype trail-index () + `(integer 0 ,(1- +trail-limit+))) + +(deftype register-index () + `(integer 0 ,(1- +register-count+))) + + +(deftype fname () + 'symbol) + +(deftype arity () + `(integer 0 ,+maximum-arity+)) + + +(deftype code-index () + ;; either an address or the sentinel + `(integer 0 ,(1- +code-limit+))) + +(deftype code-word () + t) + + +(deftype generic-code-store () + `(simple-array code-word (*))) + +(deftype query-code-holder () + `(simple-array code-word (,+maximum-query-size+))) + +(deftype query-size () + `(integer 0 ,+maximum-query-size+)) + +(deftype instruction-size () + `(integer 1 ,+maximum-instruction-size+)) + + +(deftype opcode () + `(integer 0 ,(1- +number-of-opcodes+))) + + +(deftype stack-frame-size () + `(integer 4 ,+stack-frame-size-limit+)) + +(deftype stack-choice-size () + ;; TODO: is this actually right? check on frame size limit vs choice point + ;; size limit... + `(integer 8 ,+stack-frame-size-limit+)) + +(deftype stack-frame-argcount () + 'arity) + +(deftype continuation-pointer () + 'code-index) + +(deftype environment-pointer () + 'stack-index) + +(deftype backtrack-pointer () + 'stack-index) + + +(deftype stack-frame-word () + '(or + environment-pointer ; CE + continuation-pointer ; CP + stack-frame-argcount)) ; N + +(deftype stack-choice-word () + '(or + environment-pointer ; CE + backtrack-pointer ; B, CC + continuation-pointer ; CP, BP + stack-frame-argcount ; N + trail-index ; TR + heap-index)) ; H + +(deftype stack-word () + '(or stack-frame-word stack-choice-word)) + + +;;;; Sanity Checks +;;; The values on the WAM stack are a bit of a messy situation. The WAM store +;;; is defined as an array of cells, but certain things on the stack aren't +;;; actually cells (e.g. the stored continuation pointer). +;;; +;;; This shouldn't be a problem (aside from being ugly) as long as they all fit +;;; inside fixnums... so let's just make sure that's the case. + +(defun sanity-check-stack-type (type) + (assert (subtypep type 'fixnum) () + "Type ~A is too large!" + type) + (values)) + +(sanity-check-stack-type 'stack-frame-argcount) +(sanity-check-stack-type 'environment-pointer) +(sanity-check-stack-type 'continuation-pointer) +(sanity-check-stack-type 'backtrack-pointer) +(sanity-check-stack-type 'trail-index) +(sanity-check-stack-type 'stack-word) diff -r 19200659513a -r 81939d20415a src/ui.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ui.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,233 @@ +(in-package #:bones.wam) + + +;;;; Database +(defvar *database* nil) + + +(defun make-database () + (make-wam)) + +(defun reset-database () + (setf *database* (make-database))) + + +(defmacro with-database (database &body body) + `(let ((*database* ,database)) + ,@body)) + +(defmacro with-fresh-database (&body body) + `(with-database (make-database) ,@body)) + + +;;;; Normalization +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun normalize-term (term) + ;; Normally a rule consists of a head terms and many body terms, like so: + ;; + ;; (likes sally ?who) (likes ?who cats) + ;; + ;; But sometimes people are lazy and don't include the parens around + ;; zero-arity predicates: + ;; + ;; (happy steve) sunny + (if (and (not (variablep term)) + (symbolp term) + (not (eq term '!))) ; jesus + (list term) + term))) + + +;;;; Assertion +(defun invoke-rule (head &rest body) + (assert *database* (*database*) "No database.") + (wam-logic-frame-add-clause! *database* + (list* (normalize-term head) + (mapcar #'normalize-term body))) + nil) + +(defun invoke-fact (fact) + (invoke-rule fact) + nil) + +(defun invoke-facts (&rest facts) + (mapc #'invoke-fact facts) + nil) + + +(defmacro rule (head &body body) + `(invoke-rule ',head ,@(loop :for term :in body :collect `',term))) + +(defmacro fact (fact) + `(invoke-fact ',fact)) + +(defmacro facts (&body facts) + `(progn + ,@(loop :for f :in facts :collect `(fact ,f)))) + + +;;;; Logic Frames +(defun push-logic-frame () + (assert *database* (*database*) "No database.") + (wam-push-logic-frame! *database*)) + +(defun pop-logic-frame () + (assert *database* (*database*) "No database.") + (wam-pop-logic-frame! *database*)) + +(defun finalize-logic-frame () + (assert *database* (*database*) "No database.") + (wam-finalize-logic-frame! *database*)) + +(defmacro push-logic-frame-with (&body body) + `(prog2 + (push-logic-frame) + (progn ,@body) + (finalize-logic-frame))) + + +;;;; Querying +(defun perform-aot-query (code size vars result-function) + (assert *database* (*database*) "No database.") + (run-aot-compiled-query *database* code size vars + :result-function result-function)) + +(defun perform-query (terms result-function) + (assert *database* (*database*) "No database.") + (run-query *database* (mapcar #'normalize-term terms) + :result-function result-function)) + + +(defmacro define-invocation ((name aot-name) arglist &body body) + (with-gensyms (terms data code size vars) + `(progn + (defun ,name ,(append arglist `(&rest ,terms)) + (macrolet ((invoke (result-function) + `(perform-query ,',terms ,result-function))) + ,@body)) + (defun ,aot-name ,(append arglist `(,data)) + (destructuring-bind (,code ,size ,vars) ,data + (macrolet ((invoke (result-function) + `(perform-aot-query ,',code ,',size ,',vars + ,result-function))) + ,@body)))))) + + +(define-invocation (invoke-query invoke-query-aot) () + (let ((result nil) + (succeeded nil)) + (invoke (lambda (r) + (setf result r + succeeded t) + t)) + (values result succeeded))) + +(define-invocation (invoke-query-all invoke-query-all-aot) () + (let ((results nil)) + (invoke (lambda (result) + (push result results) + nil)) + (nreverse results))) + +(define-invocation (invoke-query-map invoke-query-map-aot) (function) + (let ((results nil)) + (invoke (lambda (result) + (push (funcall function result) results) + nil)) + (nreverse results))) + +(define-invocation (invoke-query-do invoke-query-do-aot) (function) + (invoke (lambda (result) + (funcall function result) + nil)) + nil) + +(define-invocation (invoke-query-find invoke-query-find-aot) (predicate) + (let ((results nil) + (succeeded nil)) + (invoke (lambda (result) + (if (funcall predicate result) + (progn (setf results result + succeeded t) + t) + nil))) + (values results succeeded))) + +(define-invocation (invoke-prove invoke-prove-aot) () + (let ((succeeded nil)) + (invoke (lambda (result) + (declare (ignore result)) + (setf succeeded t) + t)) + succeeded)) + + +(defun quote-terms (terms) + (loop :for term :in terms :collect `',term)) + +(defmacro query (&rest terms) + `(invoke-query ,@(quote-terms terms))) + +(defmacro query-all (&rest terms) + `(invoke-query-all ,@(quote-terms terms))) + +(defmacro query-map (function &rest terms) + `(invoke-query-map ,function ,@(quote-terms terms))) + +(defmacro query-do (function &rest terms) + `(invoke-query-do ,function ,@(quote-terms terms))) + +(defmacro query-find (predicate &rest terms) + `(invoke-query-find ,predicate ,@(quote-terms terms))) + +(defmacro prove (&rest terms) + `(invoke-prove ,@(quote-terms terms))) + + +;;;; Chili Dogs +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun make-aot-data-form (terms) + (with-gensyms (code size vars) + `(load-time-value + (let* ((,code (allocate-query-holder))) + (multiple-value-bind (,vars ,size) + (compile-query-into + ,code ',(->> terms + (mapcar #'eval) + (mapcar #'normalize-term))) + (list ,code ,size ,vars))) + t)))) + + +(defmacro define-invocation-compiler-macro (name aot-name arglist) + `(define-compiler-macro ,name (&whole form + ,@arglist + &rest terms + &environment env) + (if (every (rcurry #'constantp env) terms) + `(,',aot-name ,,@arglist ,(make-aot-data-form terms)) + form))) + + +(define-invocation-compiler-macro invoke-query invoke-query-aot ()) +(define-invocation-compiler-macro invoke-query-all invoke-query-all-aot ()) +(define-invocation-compiler-macro invoke-query-map invoke-query-map-aot (function)) +(define-invocation-compiler-macro invoke-query-do invoke-query-do-aot (function)) +(define-invocation-compiler-macro invoke-query-find invoke-query-find-aot (predicate)) +(define-invocation-compiler-macro invoke-prove invoke-prove-aot ()) + + +;;;; Debugging +(defun dump (&optional full-code) + (dump-wam-full *database*) + (when full-code + (dump-wam-code *database*))) + +(defmacro bytecode (&body body) + `(with-fresh-database + (push-logic-frame-with ,@body) + (format t ";;;; PROGRAM CODE =======================~%") + (dump-wam-code *database*) + (format t "~%;;;; QUERY CODE =========================~%") + (dump-wam-query-code *database*))) + diff -r 19200659513a -r 81939d20415a src/vm.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/vm.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,919 @@ +(in-package #:bones.wam) + +;;;; Config +(defvar *step* nil) + + +;;;; Utilities +(declaim (inline functors-match-p + constants-match-p)) + + +(defun push-unbound-reference! (wam) + "Push a new unbound reference cell onto the heap, returning its address." + (wam-heap-push! wam +cell-type-reference+ (wam-heap-pointer wam))) + +(defun push-new-structure! (wam) + "Push a new structure cell onto the heap, returning its address. + + The structure cell's value will point at the next address, so make sure you + push something there too! + + " + (wam-heap-push! wam +cell-type-structure+ (1+ (wam-heap-pointer wam)))) + +(defun push-new-list! (wam) + "Push a new list cell onto the heap, returning its address. + + The list cell's value will point at the next address, so make sure you push + something there too! + + " + (wam-heap-push! wam +cell-type-list+ (1+ (wam-heap-pointer wam)))) + +(defun push-new-functor! (wam functor arity) + "Push a new functor cell pair onto the heap, returning its address." + (prog1 + (wam-heap-push! wam +cell-type-functor+ functor) + (wam-heap-push! wam +cell-type-lisp-object+ arity))) + +(defun push-new-constant! (wam constant) + "Push a new constant cell onto the heap, returning its address." + (wam-heap-push! wam +cell-type-constant+ constant)) + + +(defun functors-match-p (f1 a1 f2 a2) + "Return whether the two functor cell values represent the same functor." + (and (eq f1 f2) + (= a1 a2))) + +(defun constants-match-p (c1 c2) + "Return whether the two constant cell values unify." + (eq c1 c2)) + +(defun lisp-objects-match-p (o1 o2) + "Return whether the two lisp object cells unify." + (eql o1 o2)) + + +;;;; "Ancillary" Functions +(declaim (inline deref unbind! trail!)) + + +(defun backtrack! (wam) + "Backtrack after a failure." + (if (wam-backtrack-pointer-unset-p wam) + (setf (wam-fail wam) t) + (setf (wam-program-counter wam) (wam-stack-choice-bp wam) + (wam-cut-pointer wam) (wam-stack-choice-cc wam) + (wam-backtracked wam) t))) + +(defun trail! (wam address) + "Push the given address onto the trail (but only if necessary)." + (when (< address (wam-heap-backtrack-pointer wam)) + (wam-trail-push! wam address))) + +(defun unbind! (wam address) + "Unbind the reference cell at `address`. + + No error checking is done, so please don't try to unbind something that's not + (originally) a reference cell. + + " + (wam-set-store-cell! wam address +cell-type-reference+ address)) + +(defun unwind-trail! (wam trail-start trail-end) + "Unbind all the things in the given range of the trail." + (loop :for i :from trail-start :below trail-end :do + (unbind! wam (wam-trail-value wam i)))) + +(defun tidy-trail! (wam) + (with-accessors ((tr wam-trail-pointer) + (h wam-heap-pointer) + (hb wam-heap-backtrack-pointer) + (b wam-backtrack-pointer)) wam + (loop + ;; The book is, yet again, fucked. It just sets `i` to be the trail + ;; pointer from the choice point frame. But what if we just popped off + ;; the last choice point? If that's the case we need to look over the + ;; entire trail. + :with i = (if (wam-backtrack-pointer-unset-p wam b) + 0 + (wam-stack-choice-tr wam)) + :for target = (wam-trail-value wam i) + :while (< i tr) :do + (if (or (< target hb) + (and (< h target) + (< target b))) + (incf i) + (progn + (setf (wam-trail-value wam i) + (wam-trail-value wam (1- tr))) + (decf tr)))))) + +(defun deref (wam address) + "Dereference the address in the WAM store to its eventual destination. + + If the address is a variable that's bound to something, that something will be + looked up (recursively) and the address of whatever it's ultimately bound to + will be returned. + + " + ;; SBCL won't inline recursive functions :( + (loop + (cell-typecase (wam address) + ((:reference ref) (if (= address ref) + (return address) ; unbound ref + (setf address ref))) ; bound ref + (t (return address))))) ; non-ref + +(defun bind! (wam address-1 address-2) + "Bind the unbound reference cell to the other. + + `bind!` takes two addresses as arguments. You are expected to have `deref`ed + previously to obtain these addresses, so neither of them should ever refer to + a bound reference. + + At least one of the arguments *must* refer to an unbound reference cell. This + unbound reference will be bound to point at the other address. + + If *both* addresses refer to unbound references, the direction of the binding + is chosen arbitrarily. + + " + ;; In case it's not absolutely clear from the book: binding has to actually + ;; COPY the source cell into the destination. + ;; + ;; It can't just update the cell value of the destination REF, because if + ;; you're binding a REF on the heap to something in a register then doing so + ;; would end up with a REF to a register address. This would be bad because + ;; that register would probably get clobbered later, and the REF would now be + ;; pointing to garbage. + (cond + ;; Bind (a1 <- a2) if: + ;; + ;; * A1 is a REF and A2 is something else, or... + ;; * They're both REFs but A2 has a lower address than A1. + ((and (cell-type-p (wam address-1) :reference) + (or (not (cell-type-p (wam address-2) :reference)) + (< address-2 address-1))) + (wam-copy-store-cell! wam address-1 address-2) + (trail! wam address-1)) + + ;; Bind (a2 <- a1) if A2 is a REF and A1 is something else. + ((cell-type-p (wam address-2) :reference) + (wam-copy-store-cell! wam address-2 address-1) + (trail! wam address-2)) + + ;; wut + (t (error "At least one cell must be an unbound reference when binding.")))) + +(defun unify! (wam a1 a2) + (setf (wam-fail wam) nil) + (wam-unification-stack-push! wam a1 a2) + + (until (or (wam-fail wam) + (wam-unification-stack-empty-p wam)) + (let* ((d1 (deref wam (wam-unification-stack-pop! wam))) + (d2 (deref wam (wam-unification-stack-pop! wam))) + (t1 (wam-store-type wam d1)) + (t2 (wam-store-type wam d2))) + (macrolet ((both (cell-type-designator) + `(and + (cell-type= t1 ,cell-type-designator) + (cell-type= t2 ,cell-type-designator))) + (either (cell-type-designator) + `(or + (cell-type= t1 ,cell-type-designator) + (cell-type= t2 ,cell-type-designator)))) + (flet ((match-values (predicate) + (when (not (funcall predicate + (wam-store-value wam d1) + (wam-store-value wam d2))) + (backtrack! wam)))) + (when (not (= d1 d2)) + (cond + ;; If at least one is a reference, bind them. + ;; + ;; We know that any references we see here will be unbound because + ;; we deref'ed them above. + ((either :reference) + (bind! wam d1 d2)) + + ;; Otherwise if they're both constants or lisp objects, make sure + ;; they match exactly. + ((both :constant) (match-values #'constants-match-p)) + ((both :lisp-object) (match-values #'lisp-objects-match-p)) + + ;; Otherwise if they're both lists, unify their contents. + ((both :list) + (wam-unification-stack-push! wam + (wam-store-value wam d1) + (wam-store-value wam d2)) + (wam-unification-stack-push! wam + (1+ (wam-store-value wam d1)) + (1+ (wam-store-value wam d2)))) + + ;; Otherwise if they're both structures, make sure they match and + ;; then schedule their subterms to be unified. + ((both :structure) + (let* ((s1 (wam-store-value wam d1)) ; find where they + (s2 (wam-store-value wam d2)) ; start on the heap + (f1 (wam-store-value wam s1)) ; grab the + (f2 (wam-store-value wam s2)) ; functors + (a1 (wam-store-value wam (1+ s1))) ; and the + (a2 (wam-store-value wam (1+ s2)))) ; arities + (if (functors-match-p f1 a1 f2 a2) + ;; If the functors match, push their pairs of arguments onto + ;; the stack to be unified. + (loop :repeat a1 + :for subterm1 :from (+ 2 s1) + :for subterm2 :from (+ 2 s2) + :do (wam-unification-stack-push! wam subterm1 subterm2)) + ;; Otherwise we're hosed. + (backtrack! wam)))) + + ;; Otherwise we're looking at two different kinds of cells, and are + ;; just totally hosed. Backtrack. + (t (backtrack! wam))))))))) + + +;;;; Instruction Definition +;;; These macros are a pair of real greasy bastards. +;;; +;;; Basically the issue is that there exist two separate types of registers: +;;; local registers and stack registers. The process of retrieving the contents +;;; of a register is different for each type. +;;; +;;; Certain machine instructions take a register as an argument and do something +;;; with it. Because the two register types require different access methods, +;;; the instruction needs to know what kind of register it's dealing with. +;;; +;;; One possible way to solve this would be to encode whether this is +;;; a local/stack register in the register argument itself (e.g. with a tag +;;; bit). This would work, and a previous version of the code did that, but +;;; it's not ideal. It turns out we know the type of the register at compile +;;; time, so requiring a mask/test at run time for every register access is +;;; wasteful. +;;; +;;; Instead we use an ugly, but fast, solution. For every instruction that +;;; takes a register argument we make TWO opcodes instead of just one. The +;;; first is the "-local" variant of the instruction, which treats its register +;;; argument as a local register. The second is the "-stack" variant. When we +;;; compile we can just pick the appropriate opcode, and now we no longer need +;;; a runtime test for every single register assignment. +;;; +;;; To make the process of defining these two "variants" less excruciating we +;;; have these two macros. `define-instruction` (singular) is just a little +;;; sugar around `defun`, for those instructions that don't deal with +;;; arguments. +;;; +;;; `define-instructions` (plural) is the awful one. You pass it a pair of +;;; symbols for the two variant names. Two functions will be defined, both with +;;; the same body, with a few symbols macroletted to the appropriate access +;;; code. +;;; +;;; So in the body, instead of using: +;;; +;;; (wam-set-{local/stack}-register wam reg type value) +;;; +;;; you use: +;;; +;;; (%wam-set-register% wam reg type value) +;;; +;;; and it'll do the right thing. + +(defmacro define-instruction + ((name &optional should-inline) lambda-list &body body) + "Define an instruction function. + + This is just sugar over `defun`. + + " + `(progn + (declaim (,(if should-inline 'inline 'notinline) ,name)) + (defun ,name ,lambda-list + ,@body + nil))) + +(defmacro define-instructions + ((local-name stack-name &optional should-inline) lambda-list &body body) + "Define a local/stack pair of instructions." + `(progn + (macrolet ((%wam-register% (wam register) + `(wam-local-register-address ,wam ,register)) + (%wam-register-type% (wam register) + `(wam-local-register-type ,wam ,register)) + (%wam-register-value% (wam register) + `(wam-local-register-value ,wam ,register)) + (%wam-set-register% (wam register type value) + `(wam-set-local-register! ,wam ,register ,type ,value)) + (%wam-copy-to-register% (wam register source) + `(wam-copy-to-local-register! ,wam ,register ,source))) + (define-instruction (,local-name ,should-inline) ,lambda-list + ,@body)) + (macrolet ((%wam-register% (wam register) + `(wam-stack-register-address ,wam ,register)) + (%wam-register-type% (wam register) + `(wam-stack-register-type ,wam ,register)) + (%wam-register-value% (wam register) + `(wam-stack-register-value ,wam ,register)) + (%wam-set-register% (wam register type value) + `(wam-set-stack-register! ,wam ,register ,type ,value)) + (%wam-copy-to-register% (wam register source) + `(wam-copy-to-stack-register! ,wam ,register ,source))) + (define-instruction (,stack-name ,should-inline) ,lambda-list + ,@body)))) + + +;;;; Query Instructions +(define-instruction (%put-structure) (wam functor arity register) + (wam-set-local-register! wam register + +cell-type-structure+ + (push-new-functor! wam functor arity)) + (setf (wam-mode wam) :write)) + +(define-instruction (%put-list) (wam register) + (wam-set-local-register! wam register + +cell-type-list+ + (wam-heap-pointer wam)) + (setf (wam-mode wam) :write)) + + +(define-instructions (%put-variable-local %put-variable-stack) + (wam register argument) + (let ((ref (push-unbound-reference! wam))) + (%wam-copy-to-register% wam register ref) + (wam-copy-to-local-register! wam argument ref) + (setf (wam-mode wam) :write))) + +(define-instructions (%put-value-local %put-value-stack) + (wam register argument) + (wam-copy-to-local-register! wam argument (%wam-register% wam register)) + (setf (wam-mode wam) :write)) + + +(define-instruction (%put-void) (wam argument) + (wam-copy-to-local-register! wam argument (push-unbound-reference! wam))) + + +;;;; Program Instructions +(define-instruction (%get-structure) (wam functor arity register) + (cell-typecase (wam (deref wam register) address) + ;; If the register points at an unbound reference cell, we push three new + ;; cells onto the heap: + ;; + ;; | N | STR | N+1 | + ;; | N+1 | FUN | f | + ;; | N+2 | OBJ | n | + ;; | | | | <- S + ;; + ;; Then we bind this reference cell to point at the new structure, set + ;; the S register to point beneath it and flip over to write mode. + ;; + ;; It seems a bit confusing that we don't push the rest of the structure + ;; stuff on the heap after it too. But that's going to happen in the + ;; next few instructions (which will be subterm-*'s, executed in write + ;; mode). + (:reference + (let ((structure-address (push-new-structure! wam)) + (functor-address (push-new-functor! wam functor arity))) + (bind! wam address structure-address) + (setf (wam-mode wam) :write + (wam-subterm wam) (+ 2 functor-address)))) + + ;; If the register points at a structure cell, then we look at where + ;; that cell points (which will be the functor for the structure): + ;; + ;; | N | STR | M | points at the structure, not necessarily contiguous + ;; | ... | + ;; | M | FUN | f | the functor (hopefully it matches) + ;; | M+1 | OBJ | 2 | the arity (hopefully it matches) + ;; | M+2 | ... | ... | pieces of the structure, always contiguous + ;; | M+3 | ... | ... | and always right after the functor + ;; + ;; If it matches the functor we're looking for, we can proceed. We set + ;; the S register to the address of the first subform we need to match + ;; (M+2 in the example above). + ((:structure functor-address) + (cell-typecase (wam functor-address) + ((:functor f n) + (if (functors-match-p functor arity f n) + (setf (wam-mode wam) :read + (wam-subterm wam) (+ 2 functor-address)) + (backtrack! wam))))) + + ;; Otherwise we can't unify, so backtrack. + (t (backtrack! wam)))) + +(define-instruction (%get-list) (wam register) + (cell-typecase (wam (deref wam register) address) + ;; If the register points at a reference (unbound, because we deref'ed) we + ;; bind it to a list and flip into write mode to write the upcoming two + ;; things as its contents. + (:reference + (bind! wam address (push-new-list! wam)) + (setf (wam-mode wam) :write)) + + ;; If this is a list, we need to unify its subterms. + ((:list contents) + (setf (wam-mode wam) :read + (wam-subterm wam) contents)) + + ;; Otherwise we can't unify. + (t (backtrack! wam)))) + + +(define-instructions (%get-variable-local %get-variable-stack) + (wam register argument) + (%wam-copy-to-register% wam register argument)) + +(define-instructions (%get-value-local %get-value-stack) + (wam register argument) + (unify! wam register argument)) + + +;;;; Subterm Instructions +(define-instructions (%subterm-variable-local %subterm-variable-stack) + (wam register) + (%wam-copy-to-register% wam register + (ecase (wam-mode wam) + (:read (wam-subterm wam)) + (:write (push-unbound-reference! wam)))) + (incf (wam-subterm wam))) + +(define-instructions (%subterm-value-local %subterm-value-stack) + (wam register) + (ecase (wam-mode wam) + (:read (unify! wam register (wam-subterm wam))) + (:write (wam-heap-push! wam + (%wam-register-type% wam register) + (%wam-register-value% wam register)))) + (incf (wam-subterm wam))) + +(define-instruction (%subterm-void) (wam n) + (ecase (wam-mode wam) + (:read (incf (wam-subterm wam) n)) + (:write (loop :repeat n + :do (push-unbound-reference! wam))))) + + +;;;; Control Instructions +(declaim (inline %%procedure-call %%dynamic-procedure-call)) + + +(defun %%procedure-call (wam functor arity program-counter-increment is-tail) + (let* ((target (wam-code-label wam functor arity))) + (if (not target) + ;; Trying to call an unknown procedure. + (backtrack! wam) + (progn + (when (not is-tail) + (setf (wam-continuation-pointer wam) ; CP <- next instruction + (+ (wam-program-counter wam) program-counter-increment))) + (setf (wam-number-of-arguments wam) ; set NARGS + arity + + (wam-cut-pointer wam) ; set B0 in case we have a cut + (wam-backtrack-pointer wam) + + (wam-program-counter wam) ; jump + target))))) + +(defun %%dynamic-procedure-call (wam is-tail) + (flet + ((%go (functor arity) + (if is-tail + (%%procedure-call + wam functor arity (instruction-size +opcode-dynamic-jump+) t) + (%%procedure-call + wam functor arity (instruction-size +opcode-dynamic-call+) nil))) + (load-arguments (n start-address) + (loop :for arg :from 0 :below n + :for source :from start-address + :do (wam-copy-to-local-register! wam arg source)))) + (cell-typecase (wam (deref wam 0)) ; A_0 + ((:structure functor-address) + ;; If we have a non-zero-arity structure, we need to set up the + ;; argument registers before we call it. Luckily all the arguments + ;; conveniently live contiguously right after the functor cell. + (cell-typecase (wam functor-address) + ((:functor functor arity) + (load-arguments arity (+ 2 functor-address)) + (%go functor arity)))) + + ;; Zero-arity functors don't need to set up anything at all -- we can + ;; just call them immediately. + ((:constant c) (%go c 0)) + + ;; It's okay to do (call :var), but :var has to be bound by the time you + ;; actually reach it at runtime. + (:reference (error "Cannot dynamically call an unbound variable.")) + + ; You can't call/1 anything else. + (t (error "Cannot dynamically call something other than a structure."))))) + + +(define-instruction (%jump) (wam functor arity) + (%%procedure-call wam functor arity + (instruction-size +opcode-jump+) + t)) + +(define-instruction (%call) (wam functor arity) + (%%procedure-call wam functor arity + (instruction-size +opcode-call+) + nil)) + + +(define-instruction (%dynamic-call) (wam) + (%%dynamic-procedure-call wam nil)) + +(define-instruction (%dynamic-jump) (wam) + (%%dynamic-procedure-call wam t)) + + +(define-instruction (%proceed) (wam) + (setf (wam-program-counter wam) ; P <- CP + (wam-continuation-pointer wam))) + +(define-instruction (%allocate) (wam n) + (let ((old-e (wam-environment-pointer wam)) + (new-e (wam-stack-top wam))) + (wam-stack-ensure-size wam (+ new-e 4 n)) + (setf (wam-stack-word wam new-e) old-e ; CE + (wam-stack-word wam (+ new-e 1)) (wam-continuation-pointer wam) ; CP + (wam-stack-word wam (+ new-e 2)) (wam-cut-pointer wam) ; B0 + (wam-stack-word wam (+ new-e 3)) n ; N + (wam-environment-pointer wam) new-e))) ; E <- new-e + +(define-instruction (%deallocate) (wam) + (setf (wam-continuation-pointer wam) (wam-stack-frame-cp wam) + (wam-environment-pointer wam) (wam-stack-frame-ce wam) + (wam-cut-pointer wam) (wam-stack-frame-cut wam))) + + +;;;; Choice Instructions +(declaim (inline reset-choice-point! restore-registers-from-choice-point!)) + + +(defun reset-choice-point! (wam b) + (setf (wam-backtrack-pointer wam) b + + ;; The book is wrong here: when resetting HB we use the NEW value of B, + ;; so the heap backtrack pointer gets set to the heap pointer saved in + ;; the PREVIOUS choice point. Thanks to the errata at + ;; https://github.com/a-yiorgos/wambook/blob/master/wamerratum.txt for + ;; pointing this out. + ;; + ;; ... well, almost. The errata is also wrong here. If we're popping + ;; the FIRST choice point, then just using the HB from the "previous + ;; choice point" is going to give us garbage, so we should check for + ;; that edge case too. Please kill me. + (wam-heap-backtrack-pointer wam) + (if (wam-backtrack-pointer-unset-p wam b) + +heap-start+ + (wam-stack-choice-h wam b)))) + +(defun restore-registers-from-choice-point! (wam b) + (loop :for register :from 0 :below (wam-stack-choice-n wam b) + :for saved-register :from (wam-stack-choice-argument-address wam 0 b) + :do (wam-copy-to-local-register! wam register saved-register))) + + +(define-instruction (%try) (wam next-clause) + (let ((new-b (wam-stack-top wam)) + (nargs (wam-number-of-arguments wam))) + (wam-stack-ensure-size wam (+ new-b 8 nargs)) + (setf (wam-stack-word wam new-b) nargs ; N + (wam-stack-word wam (+ new-b 1)) (wam-environment-pointer wam) ; CE + (wam-stack-word wam (+ new-b 2)) (wam-continuation-pointer wam) ; CP + (wam-stack-word wam (+ new-b 3)) (wam-backtrack-pointer wam) ; CB + (wam-stack-word wam (+ new-b 4)) next-clause ; BP + (wam-stack-word wam (+ new-b 5)) (wam-trail-pointer wam) ; TR + (wam-stack-word wam (+ new-b 6)) (wam-heap-pointer wam) ; H + (wam-stack-word wam (+ new-b 7)) (wam-cut-pointer wam) ; CC + (wam-heap-backtrack-pointer wam) (wam-heap-pointer wam) ; HB + (wam-backtrack-pointer wam) new-b) ; B + (loop :for i :from 0 :below nargs ; A_i + :for n :from 0 :below nargs ; arg N in the choice point frame + :do (wam-copy-to-stack-choice-argument! wam n i new-b)))) + +(define-instruction (%retry) (wam next-clause) + (let ((b (wam-backtrack-pointer wam))) + (restore-registers-from-choice-point! wam b) + (unwind-trail! wam (wam-stack-choice-tr wam b) (wam-trail-pointer wam)) + (setf (wam-environment-pointer wam) (wam-stack-choice-ce wam b) + (wam-continuation-pointer wam) (wam-stack-choice-cp wam b) + ;; overwrite the next clause address in the choice point + (wam-stack-word wam (+ b 4)) next-clause + (wam-trail-pointer wam) (wam-stack-choice-tr wam b) + (wam-heap-pointer wam) (wam-stack-choice-h wam b) + (wam-heap-backtrack-pointer wam) (wam-heap-pointer wam)))) + +(define-instruction (%trust) (wam) + (let* ((b (wam-backtrack-pointer wam)) + (old-b (wam-stack-choice-cb wam b))) + (restore-registers-from-choice-point! wam b) + (unwind-trail! wam (wam-stack-choice-tr wam b) (wam-trail-pointer wam)) + (setf (wam-environment-pointer wam) (wam-stack-choice-ce wam b) + (wam-continuation-pointer wam) (wam-stack-choice-cp wam b) + (wam-trail-pointer wam) (wam-stack-choice-tr wam b) + (wam-heap-pointer wam) (wam-stack-choice-h wam b)) + (reset-choice-point! wam old-b))) + +(define-instruction (%cut) (wam) + (let ((current-choice-point (wam-backtrack-pointer wam)) + (previous-choice-point (wam-stack-frame-cut wam))) + (when (< previous-choice-point current-choice-point) + (reset-choice-point! wam previous-choice-point) + (tidy-trail! wam)))) + + +;;;; Lisp Object Instructions +(declaim (inline %%match-lisp-object)) + + +(defun %%match-lisp-object (wam object address) + (cell-typecase (wam (deref wam address) address) + ;; If the thing points at a reference (unbound, because we deref'ed) we just + ;; bind it. + (:reference + (wam-set-store-cell! wam address +cell-type-lisp-object+ object) + (trail! wam address)) + + ;; If this is a lisp object, "unify" them with eql. + ((:lisp-object contents) + (when (not (lisp-objects-match-p object contents)) + (backtrack! wam))) + + ;; Otherwise we can't unify. + (t (backtrack! wam)))) + + +(define-instruction (%get-lisp-object) (wam object register) + (%%match-lisp-object wam object register)) + +(define-instruction (%put-lisp-object) (wam object register) + (wam-set-local-register! wam register +cell-type-lisp-object+ object)) + + +;;;; Constant Instructions +(declaim (inline %%match-constant)) + + +(defun %%match-constant (wam constant address) + (cell-typecase (wam (deref wam address) address) + (:reference + (wam-set-store-cell! wam address +cell-type-constant+ constant) + (trail! wam address)) + + ((:constant c) + (when (not (constants-match-p constant c)) + (backtrack! wam))) + + (t (backtrack! wam)))) + + +(define-instruction (%put-constant) (wam constant register) + (wam-set-local-register! wam register +cell-type-constant+ constant)) + +(define-instruction (%get-constant) (wam constant register) + (%%match-constant wam constant register)) + +(define-instruction (%subterm-constant) (wam constant) + (ecase (wam-mode wam) + (:read (%%match-constant wam constant (wam-subterm wam))) + (:write (push-new-constant! wam constant))) + (incf (wam-subterm wam))) + + +;;;; Running +(defun extract-things (wam addresses) + "Extract the things at the given store addresses. + + The things will be returned in the same order as the addresses were given. + + Unbound variables will be turned into uninterned symbols. There will only be + one such symbol for any specific unbound var, so if two addresses are + (eventually) bound to the same unbound var, the symbols returned from this + function will be `eql`. + + " + (let ((unbound-vars (list))) + (labels + ((mark-unbound-var (address) + (let ((symbol (make-symbol (format nil "?VAR-~D" ; lol + (length unbound-vars))))) + (car (push (cons address symbol) unbound-vars)))) + (extract-var (address) + (cdr (or (assoc address unbound-vars) + (mark-unbound-var address)))) + (recur (address) + (cell-typecase (wam (deref wam address) address) + (:null "NULL?!") + ((:reference r) (extract-var r)) + ((:structure s) (recur s)) + ((:list l) (cons (recur l) (recur (1+ l)))) + ((:constant c) c) + ((:functor functor arity) + (list* functor + (loop :repeat arity + :for subterm :from (+ 2 address) + :collect (recur subterm)))) + ((:lisp-object o) o) + (t (error "What to heck is this?"))))) + (mapcar #'recur addresses)))) + +(defun extract-query-results (wam vars) + (let* ((addresses (loop :for var :in vars + ;; TODO: make this suck less + :for i :from (+ (wam-environment-pointer wam) 4) + :collect i)) + (results (extract-things wam addresses))) + (weave vars results))) + + +(defmacro instruction-call (wam instruction code-store pc number-of-arguments) + "Expand into a call of the appropriate machine instruction. + + `pc` should be a safe place representing the program counter. + + `code-store` should be a safe place representing the instructions. + + " + `(,instruction ,wam + ,@(loop :for i :from 1 :to number-of-arguments + :collect `(aref ,code-store (+ ,pc ,i))))) + +(defmacro opcode-case ((wam code opcode-place) &rest clauses) + "Handle each opcode in the main VM run loop. + + Each clause should be of the form: + + (opcode &key instruction (increment-pc t) raw) + + `opcode` must be a constant by macroexpansion time. + + `instruction` should be the corresponding instruction function to call. If + given it will be expanded with the appropriate `aref`s to get its arguments + from the code store. + + If `increment-pc` is true an extra `incf` form will be added after the + instruction to handle incrementing the program counter (but only if + backtracking didn't happen). + + If a `raw` argument is given it will be spliced in verbatim. + + " + ;; This macro is pretty nasty, but it's better than trying to write it all out + ;; by hand. + ;; + ;; The main idea is that we want to be able to nicely specify all our + ;; opcode/instruction pairs in `run`. Furthermore, we need to handle + ;; everything really efficiently because `run` is the hot loop of the entire + ;; VM. It is the #1 function you'll see when profiling. + ;; + ;; This macro handles expanding each case clause into the appropriate `aref`s + ;; and such, as well as updating the program counter. The instruction size of + ;; each opcode is looked up at macroexpansion time to save cycles. + ;; + ;; For example, a clause like this: + ;; + ;; (opcode-case (wam code opcode) + ;; ;; ... + ;; (#.+opcode-put-structure+ :instruction %put-structure)) + ;; + ;; will get expanded into something like this: + ;; + ;; (ecase/tree opcode + ;; ;; ... + ;; (+opcode-put-structure+ (%put-structure wam (aref code (+ program-counter 1)) + ;; (aref code (+ program-counter 2))) + ;; (incf program-counter 3))) + (flet + ((parse-opcode-clause (clause) + (destructuring-bind (opcode &key instruction (increment-pc t) raw) + clause + (let ((size (instruction-size opcode))) + `(,opcode + ,(when instruction + `(instruction-call ,wam + ,instruction + ,code + (wam-program-counter ,wam) + ,(1- size))) + ,(when increment-pc + `(when (not (wam-backtracked ,wam)) + (incf (wam-program-counter ,wam) ,size))) + ,raw))))) + `(ecase/tree ,opcode-place + ,@(mapcar #'parse-opcode-clause clauses)))) + + +(defun run (wam done-thunk &optional (step *step*)) + (loop + :with code = (wam-code wam) + :until (or (wam-fail wam) ; failure + (= (wam-program-counter wam) +code-sentinel+)) ; finished + :for opcode = (the opcode (aref (wam-code wam) (wam-program-counter wam))) + :do (progn + (when step + (dump) + (break "About to execute instruction at ~4,'0X" (wam-program-counter wam))) + + (opcode-case (wam code opcode) + ;; Query + (#.+opcode-put-structure+ :instruction %put-structure) + (#.+opcode-put-variable-local+ :instruction %put-variable-local) + (#.+opcode-put-variable-stack+ :instruction %put-variable-stack) + (#.+opcode-put-value-local+ :instruction %put-value-local) + (#.+opcode-put-value-stack+ :instruction %put-value-stack) + (#.+opcode-put-void+ :instruction %put-void) + ;; Program + (#.+opcode-get-structure+ :instruction %get-structure) + (#.+opcode-get-variable-local+ :instruction %get-variable-local) + (#.+opcode-get-variable-stack+ :instruction %get-variable-stack) + (#.+opcode-get-value-local+ :instruction %get-value-local) + (#.+opcode-get-value-stack+ :instruction %get-value-stack) + ;; Subterm + (#.+opcode-subterm-variable-local+ :instruction %subterm-variable-local) + (#.+opcode-subterm-variable-stack+ :instruction %subterm-variable-stack) + (#.+opcode-subterm-value-local+ :instruction %subterm-value-local) + (#.+opcode-subterm-value-stack+ :instruction %subterm-value-stack) + (#.+opcode-subterm-void+ :instruction %subterm-void) + ;; Constant + (#.+opcode-put-constant+ :instruction %put-constant) + (#.+opcode-get-constant+ :instruction %get-constant) + (#.+opcode-subterm-constant+ :instruction %subterm-constant) + ;; Lisp Objects + (#.+opcode-put-lisp-object+ :instruction %put-lisp-object) + (#.+opcode-get-lisp-object+ :instruction %get-lisp-object) + ;; List + (#.+opcode-put-list+ :instruction %put-list) + (#.+opcode-get-list+ :instruction %get-list) + ;; Choice + (#.+opcode-try+ :instruction %try) + (#.+opcode-retry+ :instruction %retry) + (#.+opcode-trust+ :instruction %trust) + (#.+opcode-cut+ :instruction %cut) + ;; Control + (#.+opcode-allocate+ :instruction %allocate) + (#.+opcode-deallocate+ :instruction %deallocate) + (#.+opcode-proceed+ :instruction %proceed :increment-pc nil) + (#.+opcode-jump+ :instruction %jump :increment-pc nil) + (#.+opcode-call+ :instruction %call :increment-pc nil) + (#.+opcode-dynamic-jump+ :instruction %dynamic-jump :increment-pc nil) + (#.+opcode-dynamic-call+ :instruction %dynamic-call :increment-pc nil) + ;; Final + (#.+opcode-done+ + :increment-pc nil + :raw (if (funcall done-thunk) + (return-from run nil) + (backtrack! wam)))) + + (setf (wam-backtracked wam) nil) + + (when (>= (wam-program-counter wam) + (wam-code-pointer wam)) + (error "Fell off the end of the program code store.")))) + nil) + + +(defun %run-query (wam vars result-function) + (setf (wam-program-counter wam) 0 + (wam-continuation-pointer wam) +code-sentinel+) + (run wam (lambda () + (funcall result-function + (extract-query-results wam vars)))) + (wam-reset! wam) + nil) + +(defun run-query (wam terms &key (result-function + (lambda (results) + (declare (ignore results))))) + "Compile query `terms` and run the instructions on the `wam`. + + Resets the heap, etc after running. + + When `*step*` is true, break into the debugger before calling the procedure + and after each instruction. + + " + (%run-query wam (compile-query wam terms) result-function)) + +(defun run-aot-compiled-query (wam query-code query-size query-vars + &key (result-function + (lambda (results) + (declare (ignore results))))) + "Run the AOT-compiled query `code`/`vars` on the `wam`. + + Resets the heap, etc after running. + + When `*step*` is true, break into the debugger before calling the procedure + and after each instruction. + + " + (wam-load-query-code! wam query-code query-size) + (%run-query wam query-vars result-function)) + + diff -r 19200659513a -r 81939d20415a src/wam.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/wam.lisp Sat Aug 20 22:06:27 2016 +0000 @@ -0,0 +1,897 @@ +(in-package #:bones.wam) + +;;;; WAM +(defun allocate-wam-code (size) + ;; The WAM bytecode is all stored in this array. The first + ;; `+maximum-query-size+` words are reserved for query bytecode, which will + ;; get loaded in (overwriting the previous query) when making a query. + ;; Everything after that is for the actual database. + (make-array (+ +maximum-query-size+ size) + :initial-element 0 + :element-type 'code-word)) + +(defun allocate-query-holder () + (make-array +maximum-query-size+ + :adjustable nil + :initial-element 0 + :element-type 'code-word)) + + +(defun allocate-wam-type-store (size) + ;; The main WAM store(s) contain three separate blocks of values: + ;; + ;; [0, +register-count+) -> the local X_n registers + ;; [+stack-start+, +stack-end+) -> the stack + ;; [+heap-start+, ...) -> the heap + ;; + ;; `+register-count+` and `+stack-start+` are the same number, and + ;; `+stack-end+` and `+heap-start+` are the same number as well. + (make-array (+ +register-count+ + +stack-limit+ + size) ; type array + :initial-element +cell-type-null+ + :element-type 'cell-type)) + +(defun allocate-wam-value-store (size) + (make-array (+ +register-count+ + +stack-limit+ + size) + :initial-element 0 + :element-type 'cell-value)) + +(defun allocate-wam-unification-stack (size) + (make-array size + :fill-pointer 0 + :adjustable t + :element-type 'store-index)) + +(defun allocate-wam-trail (size) + (make-array size + :fill-pointer 0 + :adjustable t + :initial-element 0 + :element-type 'store-index)) + + +(defstruct (wam (:constructor make-wam%)) + ;; Data + (type-store + (error "Type store required.") + :type type-store + :read-only t) + (value-store + (error "Value store required.") + :type value-store + :read-only t) + (unification-stack + (error "Unification stack required.") + :type (vector store-index) + :read-only t) + (trail + (error "Trail required.") + :type (vector store-index) + :read-only t) + + ;; Code + (code + (error "Code store required.") + :type (simple-array code-word (*)) + :read-only t) + (code-labels + (make-array +maximum-arity+ :initial-element nil) + :type (simple-array (or null hash-table)) + :read-only t) + + ;; Logic Stack + (logic-stack nil :type list) + (logic-pool nil :type list) + + ;; Unique registers + (number-of-arguments 0 :type arity) ; NARGS + (subterm +heap-start+ :type heap-index) ; S + (program-counter 0 :type code-index) ; P + (code-pointer +code-main-start+ :type code-index) ; CODE + (heap-pointer (1+ +heap-start+) :type heap-index) ; H + (stack-pointer +stack-start+ :type stack-index) ; SP + (continuation-pointer 0 :type code-index) ; CP + (environment-pointer +stack-start+ :type environment-pointer) ; E + (backtrack-pointer +stack-start+ :type backtrack-pointer) ; B + (cut-pointer +stack-start+ :type backtrack-pointer) ; B0 + (heap-backtrack-pointer +heap-start+ :type heap-index) ; HB + + ;; Flags + (fail nil :type boolean) + (backtracked nil :type boolean) + (mode nil :type (or null (member :read :write)))) + + +(defmethod print-object ((wam wam) stream) + (print-unreadable-object + (wam stream :type t :identity t) + (format stream "an wam"))) + + +(defun make-wam (&key + (store-size (megabytes 10)) + (code-size (megabytes 1))) + (make-wam% :code (allocate-wam-code code-size) + :type-store (allocate-wam-type-store store-size) + :value-store (allocate-wam-value-store store-size) + :unification-stack (allocate-wam-unification-stack 16) + :trail (allocate-wam-trail 64))) + + +;;;; Store +;;; The main store of the WAM is split into two separate arrays: +;;; +;;; * An array of cell types, each a fixnum. +;;; * An array of cell values, each being a fixnum or a normal Lisp pointer. +;;; +;;; The contents of the value depend on the type of cell. +;;; +;;; NULL cells always have a value of zero. +;;; +;;; STRUCTURE cell values are an index into the store, describing where the +;;; structure starts. +;;; +;;; REFERENCE cell values are an index into the store, pointing at whatever the +;;; value is bound to. Unbound variables contain their own store index as +;;; a value. +;;; +;;; FUNCTOR cell values are a pointer to a `(fname . arity)` cons. +;;; +;;; CONSTANT cells are the same as functor cells, except that they always happen +;;; to refer to functors with an arity of zero. +;;; +;;; LIST cell values are an index into the store, pointing at the first of two +;;; consecutive cells. The first cell is the car of the list, the second one is +;;; the cdr. +;;; +;;; LISP-OBJECT cell values are simply arbitrary objects in memory. They are +;;; compared with `eql` during the unification process, so we don't actually +;;; care WHAT they are, exactly. +;;; +;;; STACK cell values are special cases. The WAM's main store is a combination +;;; of the heap, the stack, and registers. Heap cells (and registers) are those +;;; detailed above, but stack cells can also hold numbers like the continuation +;;; pointer. We lump all the extra things together into one kind of cell. + +(declaim (inline wam-store-type + wam-store-value + wam-set-store-cell! + wam-copy-store-cell!)) + + +(defun wam-store-type (wam address) + "Return the type of the cell at the given address." + (aref (wam-type-store wam) address)) + +(defun wam-store-value (wam address) + "Return the value of the cell at the given address." + (aref (wam-value-store wam) address)) + + +(defun wam-set-store-cell! (wam address type value) + (setf (aref (wam-type-store wam) address) type + (aref (wam-value-store wam) address) value)) + +(defun wam-copy-store-cell! (wam destination source) + (wam-set-store-cell! wam + destination + (wam-store-type wam source) + (wam-store-value wam source))) + + +(defun wam-sanity-check-store-read (wam address) + (declare (ignore wam)) + (when (= address +heap-start+) + (error "Cannot read from heap address zero."))) + + +(macrolet ((define-unsafe (name return-type) + `(progn + (declaim (inline ,name)) + (defun ,name (wam address) + (the ,return-type (aref (wam-value-store wam) address)))))) + (define-unsafe %unsafe-null-value (eql 0)) + (define-unsafe %unsafe-structure-value store-index) + (define-unsafe %unsafe-reference-value store-index) + (define-unsafe %unsafe-functor-value fname) + (define-unsafe %unsafe-constant-value fname) + (define-unsafe %unsafe-list-value store-index) + (define-unsafe %unsafe-lisp-object-value t) + (define-unsafe %unsafe-stack-value stack-word)) + + +(defun %type-designator-constant (designator) + (ecase designator + (:null +cell-type-null+) + (:structure +cell-type-structure+) + (:reference +cell-type-reference+) + (:functor +cell-type-functor+) + (:constant +cell-type-constant+) + (:list +cell-type-list+) + (:lisp-object +cell-type-lisp-object+) + ((t) t))) + +(defun %type-designator-accessor (designator) + (ecase designator + (:null '%unsafe-null-value) + (:structure '%unsafe-structure-value) + (:reference '%unsafe-reference-value) + (:functor '%unsafe-functor-value) + (:constant '%unsafe-constant-value) + (:list '%unsafe-list-value) + (:lisp-object '%unsafe-lisp-object-value))) + +(defun parse-cell-typecase-clause (wam address clause) + "Parse a `cell-typecase` clause into the appropriate `ecase` clause." + (destructuring-bind (binding . body) clause + (destructuring-bind + (type-designator &optional value-symbol secondary-value-symbol) + (if (symbolp binding) (list binding) binding) ; normalize binding + (let ((primary-let-binding + (when value-symbol + `((,value-symbol (,(%type-designator-accessor type-designator) + ,wam ,address))))) + (secondary-let-binding + (when secondary-value-symbol + `((,secondary-value-symbol + ,(ecase type-designator + (:functor + `(the arity (%unsafe-lisp-object-value ; yolo + ,wam + (1+ ,address)))))))))) + ; build the ecase clause (const ...body...) + (list + (%type-designator-constant type-designator) + `(let (,@primary-let-binding + ,@secondary-let-binding) + ,@body)))))) + +(defmacro cell-typecase ((wam address &optional address-symbol) &rest clauses) + "Dispatch on the type of the cell at `address` in the WAM store. + + If `address-symbol` is given it will be bound to the result of evaluating + `address` in the remainder of the form. + + The type of the cell will be matched against `clauses` much like `typecase`. + + Each clause should be of the form `(binding forms)`. + + Each binding can be either a simple cell type designator like `:reference`, or + a list of this designator and a symbol to bind the cell's value to. The + symbol is bound with `let` around the `forms` and type-hinted appropriately + (at least on SBCL). + + Example: + + (cell-typecase (wam (deref wam address) final-address) + (:reference (bind final-address foo) + 'it-is-a-reference) + ((:constant c) (list 'it-is-the-constant c)) + (t 'unknown)) + + " + (once-only (wam address) + `(progn + (policy-cond:policy-if (or (= safety 3) (= debug 3)) + (wam-sanity-check-store-read ,wam ,address) + nil) + (let (,@(when address-symbol + (list `(,address-symbol ,address)))) + (case (wam-store-type ,wam ,address) + ,@(mapcar (curry #'parse-cell-typecase-clause wam address) + clauses)))))) + + +(defmacro cell-type= (type type-designator) + `(= ,type ,(%type-designator-constant type-designator))) + +(defmacro cell-type-p ((wam address) type-designator) + `(cell-type= + (wam-store-type ,wam ,address) + ,type-designator)) + + +;;;; Heap +;;; The WAM heap is all the memory left in the store after the local registers +;;; and stack have been accounted for. Because the store is adjustable and the +;;; heap lives at the end of it, the heap can grow if necessary. +;;; +;;; We reserve the first address in the heap as a sentinel, as an "unset" value +;;; for various pointers into the heap. + +(declaim (inline wam-heap-pointer-unset-p wam-heap-push!)) + + +(defun wam-heap-pointer-unset-p (wam address) + (declare (ignore wam)) + (= address +heap-start+)) + +(defun wam-heap-push! (wam type value) + "Push the cell onto the WAM heap and increment the heap pointer. + + Returns the address it was pushed to. + + " + (let ((heap-pointer (wam-heap-pointer wam))) + (if (>= heap-pointer +store-limit+) ; todo: respect actual size... + (error "WAM heap exhausted.") + (progn + (wam-set-store-cell! wam heap-pointer type value) + (incf (wam-heap-pointer wam)) + heap-pointer)))) + + +;;;; Trail +(declaim (inline wam-trail-pointer + (setf wam-trail-pointer) + wam-trail-value + (setf wam-trail-value))) + + +(defun wam-trail-pointer (wam) + "Return the current trail pointer of the WAM." + (fill-pointer (wam-trail wam))) + +(defun (setf wam-trail-pointer) (new-value wam) + (setf (fill-pointer (wam-trail wam)) new-value)) + + +(defun wam-trail-push! (wam address) + "Push `address` onto the trail. + + Returns the address and the trail address it was pushed to. + + " + (let ((trail (wam-trail wam))) + (if (= +trail-limit+ (fill-pointer trail)) + (error "WAM trail exhausted.") + (values address (vector-push-extend address trail))))) + +(defun wam-trail-pop! (wam) + "Pop the top address off the trail and return it." + (vector-pop (wam-trail wam))) + +(defun wam-trail-value (wam address) + ;; TODO: can we really not just pop, or is something else gonna do something + ;; fucky with the trail? + "Return the element (a heap index) in the WAM trail at `address`." + (aref (wam-trail wam) address)) + +(defun (setf wam-trail-value) (new-value wam address) + (setf (aref (wam-trail wam) address) new-value)) + + +;;;; Stack +;;; The stack is stored as a fixed-length hunk of the main WAM store array, +;;; between the local register and the heap, with small glitch: we reserve the +;;; first word of the stack (address `+stack-start`) to mean "uninitialized", so +;;; we have a nice sentinel value for the various pointers into the stack. + +(declaim (inline assert-inside-stack + wam-stack-ensure-size + wam-stack-word + (setf wam-stack-word) + wam-backtrack-pointer-unset-p + wam-environment-pointer-unset-p)) + + +(defun assert-inside-stack (wam address) + (declare (ignorable wam address)) + (policy-cond:policy-cond + ((>= debug 2) + (progn + (assert (<= +stack-start+ address (1- +stack-end+)) () + "Cannot access stack cell at address ~X (outside the stack range ~X to ~X)" + address +stack-start+ +stack-end+) + (assert (not (= +stack-start+ address)) () + "Cannot access stack address zero."))) + ((>= safety 1) + (when (not (< +stack-start+ address +stack-end+)) + (error "Stack bounds crossed. Game over."))) + (t nil)) ; wew lads + nil) + +(defun wam-stack-ensure-size (wam address) + "Ensure the WAM stack is large enough to be able to write to `address`." + (assert-inside-stack wam address)) + + +(defun wam-stack-word (wam address) + "Return the stack word at the given address." + (assert-inside-stack wam address) + (%unsafe-stack-value wam address)) + +(defun (setf wam-stack-word) (new-value wam address) + (assert-inside-stack wam address) + (wam-set-store-cell! wam address +cell-type-stack+ new-value)) + + +(defun wam-backtrack-pointer-unset-p + (wam &optional (backtrack-pointer (wam-backtrack-pointer wam))) + (= backtrack-pointer +stack-start+)) + +(defun wam-environment-pointer-unset-p + (wam &optional (environment-pointer (wam-environment-pointer wam))) + (= environment-pointer +stack-start+)) + + +;;; Stack frames are laid out like so: +;;; +;;; |PREV| +;;; | CE | <-- environment-pointer +;;; | CP | +;;; | B0 | +;;; | N | +;;; | Y0 | +;;; | .. | +;;; | Yn | +;;; |NEXT| <-- fill-pointer + +(declaim (inline wam-stack-frame-ce + wam-stack-frame-cp + wam-stack-frame-cut + wam-stack-frame-n + wam-stack-frame-size + wam-stack-frame-argument-address + wam-set-stack-frame-argument!)) + + +(defun wam-stack-frame-ce (wam &optional (e (wam-environment-pointer wam))) + (wam-stack-word wam e)) + +(defun wam-stack-frame-cp (wam &optional (e (wam-environment-pointer wam))) + (wam-stack-word wam (1+ e))) + +(defun wam-stack-frame-cut (wam &optional (e (wam-environment-pointer wam))) + (wam-stack-word wam (+ 2 e))) + +(defun wam-stack-frame-n (wam &optional (e (wam-environment-pointer wam))) + (wam-stack-word wam (+ 3 e))) + + +(defun wam-stack-frame-argument-address + (wam n &optional (e (wam-environment-pointer wam))) + (+ 4 n e)) + +(defun wam-set-stack-frame-argument! (wam n type value + &optional (e (wam-environment-pointer wam))) + (wam-set-store-cell! wam (wam-stack-frame-argument-address wam n e) + type value)) + +(defun wam-copy-to-stack-frame-argument! (wam n source + &optional (e (wam-environment-pointer wam))) + (wam-copy-store-cell! wam (wam-stack-frame-argument-address wam n e) + source)) + + +(defun wam-stack-frame-size (wam &optional (e (wam-environment-pointer wam))) + "Return the size of the stack frame starting at environment pointer `e`." + (+ (wam-stack-frame-n wam e) 4)) + + +;;; Choice point frames are laid out like so: +;;; +;;; |PREV| +;;; 0 | N | number of arguments <-- backtrack-pointer +;;; 1 | CE | continuation environment +;;; 2 | CP | continuation pointer +;;; 3 | CB | previous choice point +;;; 4 | BP | next clause +;;; 5 | TR | trail pointer +;;; 6 | H | heap pointer +;;; 7 | CC | saved cut pointer +;;; 8 | A0 | +;;; | .. | +;;; 8+n | An | +;;; |NEXT| <-- environment-pointer +;;; +;;; This is a bit different than the book. We stick the args at the end of the +;;; frame instead of the beginning so it's easier to retrieve the other values. + +(declaim (inline wam-stack-choice-n + wam-stack-choice-ce + wam-stack-choice-cp + wam-stack-choice-cb + wam-stack-choice-cc + wam-stack-choice-bp + wam-stack-choice-tr + wam-stack-choice-h + wam-stack-choice-size + wam-stack-choice-argument-address + wam-set-stack-choice-argument! + wam-copy-to-stack-choice-argument!)) + + +(defun wam-stack-choice-n (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam b)) + +(defun wam-stack-choice-ce (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 1))) + +(defun wam-stack-choice-cp (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 2))) + +(defun wam-stack-choice-cb (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 3))) + +(defun wam-stack-choice-bp (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 4))) + +(defun wam-stack-choice-tr (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 5))) + +(defun wam-stack-choice-h (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 6))) + +(defun wam-stack-choice-cc (wam &optional (b (wam-backtrack-pointer wam))) + (wam-stack-word wam (+ b 7))) + + +(defun wam-stack-choice-argument-address + (wam n &optional (b (wam-backtrack-pointer wam))) + (+ 8 n b)) + +(defun wam-set-stack-choice-argument! (wam n type value + &optional (b (wam-backtrack-pointer wam))) + (wam-set-store-cell! wam (wam-stack-choice-argument-address wam n b) + type value)) + +(defun wam-copy-to-stack-choice-argument! (wam n source + &optional (b (wam-backtrack-pointer wam))) + (wam-copy-store-cell! wam (wam-stack-choice-argument-address wam n b) + source)) + + +(defun wam-stack-choice-size (wam &optional (b (wam-backtrack-pointer wam))) + "Return the size of the choice frame starting at backtrack pointer `b`." + (+ (wam-stack-choice-n wam b) 8)) + + +(defun wam-stack-top (wam) + "Return the top of the stack. + + This is the first place it's safe to overwrite in the stack. + + " + ;; The book is wrong here -- it looks up the "current frame size" to + ;; determine where the next frame should start, but on the first allocation + ;; there IS no current frame so it looks at garbage. Fuckin' great. + (let ((e (wam-environment-pointer wam)) + (b (wam-backtrack-pointer wam))) + (cond + ((and (wam-backtrack-pointer-unset-p wam b) + (wam-environment-pointer-unset-p wam e)) ; first allocation + (1+ +stack-start+)) + ((> e b) ; the last thing on the stack is a frame + (+ e (wam-stack-frame-size wam e))) + (t ; the last thing on the stack is a choice point + (+ b (wam-stack-choice-size wam b)))))) + + +;;;; Resetting +(defun wam-truncate-heap! (wam) + ;; todo: null out the heap once we're storing live objects + (setf (wam-heap-pointer wam) (1+ +heap-start+))) + +(defun wam-truncate-trail! (wam) + (setf (fill-pointer (wam-trail wam)) 0)) + +(defun wam-truncate-unification-stack! (wam) + (setf (fill-pointer (wam-unification-stack wam)) 0)) + +(defun wam-reset-local-registers! (wam) + (fill (wam-type-store wam) +cell-type-null+ :start 0 :end +register-count+) + (fill (wam-value-store wam) 0 :start 0 :end +register-count+)) + +(defun wam-reset! (wam) + (wam-truncate-heap! wam) + (wam-truncate-trail! wam) + (wam-truncate-unification-stack! wam) + (policy-cond:policy-if (>= debug 2) + ;; todo we can't elide this once we start storing live objects... :( + (wam-reset-local-registers! wam) + nil) ; fuck it + (fill (wam-code wam) 0 :start 0 :end +maximum-query-size+) + (setf (wam-program-counter wam) 0 + (wam-continuation-pointer wam) 0 + (wam-environment-pointer wam) +stack-start+ + (wam-backtrack-pointer wam) +stack-start+ + (wam-cut-pointer wam) +stack-start+ + (wam-heap-backtrack-pointer wam) +heap-start+ + (wam-backtracked wam) nil + (wam-fail wam) nil + (wam-subterm wam) +heap-start+ + (wam-mode wam) nil)) + + +;;;; Code +;;; The WAM needs to be able to look up predicates at runtime. To do this we +;;; keep a data structure that maps a functor and arity to a location in the +;;; code store. +;;; +;;; This data structure is an array, with the arity we're looking up being the +;;; position. At that position will be a hash tables of the functor symbols to +;;; the locations. +;;; +;;; Each arity's table will be created on-the-fly when it's first needed. + +(defun retrieve-instruction (code-store address) + "Return the full instruction at the given address in the code store." + (make-array (instruction-size (aref code-store address)) + :displaced-to code-store + :displaced-index-offset address + :adjustable nil + :element-type 'code-word)) + + +(defun wam-code-label (wam functor arity) + (let ((atable (aref (wam-code-labels wam) arity))) + (when atable + (values (gethash functor atable))))) + +(defun (setf wam-code-label) (new-value wam functor arity) + (setf (gethash functor (aref-or-init (wam-code-labels wam) arity + (make-hash-table :test 'eq))) + new-value)) + +(defun wam-code-label-remove! (wam functor arity) + (let ((atable (aref (wam-code-labels wam) arity))) + (when atable + ;; todo: remove the table entirely when empty? + (remhash functor atable)))) + + +(declaim (ftype (function (wam query-code-holder query-size) + (values null &optional)) + wam-load-query-code!)) +(defun wam-load-query-code! (wam query-code query-size) + (setf (subseq (wam-code wam) 0 query-size) query-code) + nil) + + +;;;; Logic Stack +;;; The logic stack is stored as a simple list in the WAM. `logic-frame` +;;; structs are pushed and popped from this list as requested. +;;; +;;; There's one small problem: logic frames need to keep track of which +;;; predicates are awaiting compilation, and the best data structure for that is +;;; a hash table. But hash tables are quite expensive to allocate when you're +;;; pushing and popping tons of frames per second. So the WAM also keeps a pool +;;; of logic frames to reuse, which lets us simply `clrhash` in between instead +;;; of having to allocate a brand new hash table. + +(declaim (inline assert-logic-frame-poppable)) + + +(defstruct logic-frame + (start 0 :type code-index) + (final nil :type boolean) + (predicates (make-hash-table :test 'equal) :type hash-table)) + + +(defun wam-logic-pool-release (wam frame) + (with-slots (start final predicates) frame + (clrhash predicates) + (setf start 0 final nil)) + (push frame (wam-logic-pool wam)) + nil) + +(defun wam-logic-pool-request (wam) + (or (pop (wam-logic-pool wam)) + (make-logic-frame))) + + +(defun wam-current-logic-frame (wam) + (first (wam-logic-stack wam))) + +(defun wam-logic-stack-empty-p (wam) + (not (wam-current-logic-frame wam))) + + +(defun wam-logic-open-p (wam) + (let ((frame (wam-current-logic-frame wam))) + (and frame (not (logic-frame-final frame))))) + +(defun wam-logic-closed-p (wam) + (not (wam-logic-open-p wam))) + + +(defun wam-push-logic-frame! (wam) + (assert (wam-logic-closed-p wam) () + "Cannot push logic frame unless the logic stack is closed.") + (let ((frame (wam-logic-pool-request wam))) + (setf (logic-frame-start frame) + (wam-code-pointer wam)) + (push frame (wam-logic-stack wam))) + nil) + +(defun assert-logic-frame-poppable (wam) + (let ((logic-stack (wam-logic-stack wam))) + (policy-cond:policy-if (or (> safety 1) (> debug 0) (< speed 3)) + ;; Slow + (progn + (assert logic-stack () + "Cannot pop logic frame from an empty logic stack.") + (assert (logic-frame-final (first logic-stack)) () + "Cannot pop unfinalized logic frame.")) + ;; Fast + (when (or (not logic-stack) + (not (logic-frame-final (first logic-stack)))) + (error "Cannot pop logic frame."))))) + +(defun wam-pop-logic-frame! (wam) + (with-slots (logic-stack) wam + (assert-logic-frame-poppable wam) + (let ((frame (pop logic-stack))) + (setf (wam-code-pointer wam) + (logic-frame-start frame)) + (loop :for (functor . arity) + :being :the hash-keys :of (logic-frame-predicates frame) + :do (wam-code-label-remove! wam functor arity)) + (wam-logic-pool-release wam frame))) + nil) + + +(defun assert-label-not-already-compiled (wam clause functor arity) + (assert (not (wam-code-label wam functor arity)) + () + "Cannot add clause ~S because its predicate has preexisting compiled code." + clause)) + +(defun wam-logic-frame-add-clause! (wam clause) + (assert (wam-logic-open-p wam) () + "Cannot add clause ~S without an open logic stack frame." + clause) + + (multiple-value-bind (functor arity) (find-predicate clause) + (assert-label-not-already-compiled wam clause functor arity) + (enqueue clause (gethash-or-init + (cons functor arity) + (logic-frame-predicates (wam-current-logic-frame wam)) + (make-queue)))) + nil) + + +(defun wam-finalize-logic-frame! (wam) + (assert (wam-logic-open-p wam) () + "There is no logic frame waiting to be finalized.") + (with-slots (predicates final) + (wam-current-logic-frame wam) + (loop :for clauses :being :the hash-values :of predicates + ;; circular dep on the compiler here, ugh. + :do (compile-rules wam (queue-contents clauses))) + (setf final t)) + nil) + + +;;;; Registers +;;; The WAM has two types of registers: +;;; +;;; * Local/temporary/arguments registers live at the beginning of the WAM +;;; memory store. +;;; +;;; * Stack/permanent registers live on the stack, and need some extra math to +;;; find their location. +;;; +;;; Registers are typically denoted by their "register index", which is just +;;; their number. Hoever, the bytecode needs to be able to distinguish between +;;; local and stack registers. To do this we just make separate opcodes for +;;; each kind. This is ugly, but it lets us figure things out at compile time +;;; instead of runtime, and register references happen A LOT at runtime. +;;; +;;; As for the CONTENTS of registers: a register (regardless of type) always +;;; contains a cell. The book is maddeningly unclear on this in a bunch of +;;; ways. I will list them here so maybe you can feel a bit of my suffering +;;; through these bytes of text. +;;; +;;; The first thing the book says about registers is "registers have the same +;;; format as heap cells". Okay, fine. The *very next diagram* shows "register +;;; assignments" that appear to put things that are very much *not* heap cells +;;; into registers! +;;; +;;; After a bit of puttering you realize that the diagram is referring only to +;;; the compilation, not what's *actually* stored in these registers at runtime. +;;; You move on and see some pseudocode that contains `X_i <- HEAP[H]` which +;;; confirms that his original claim was accurate, and registers are actually +;;; (copies of) heap cells. Cool. +;;; +;;; Then you move on and see the definition of `deref(a : address)` and note +;;; that it takes an *address* as an argument. On the next page you see +;;; `deref(X_i)` and wait what the fuck, a register is an *address* now? You +;;; scan down the page and see `HEAP[H] <- X_i` which means no wait it's a cell +;;; again. +;;; +;;; After considering depositing your laptop into the nearest toilet and +;;; becoming a sheep farmer, you conclude a few things: +;;; +;;; 1. The book's code won't typecheck. +;;; 2. The author is playing fast and loose with `X_i` -- sometimes it seems to +;;; be used as an address, sometimes as a cell. +;;; 3. The author never bothers to nail down exactly what is inside the fucking +;;; things, which is a problem because of #2. +;;; +;;; If you're like me (painfully unlucky), you took a wild guess and decided to +;;; implement registers as containing *addresses*, i.e., indexes into the +;;; heap, figuring that if you were wrong it would soon become apparent. +;;; +;;; WELL it turns out that you can get all the way to CHAPTER FIVE with +;;; registers implemented as addresses, at which point you hit a wall and need +;;; to spend a few hours refactoring a giant chunk of your code and writing +;;; angry comments in your source code. +;;; +;;; Hopefully I can save someone else this misery by leaving you with this: +;;; ____ _____________________________________ _____ ___ ____ ______ ______________ __ _____ +;;; / __ \/ ____/ ____/ _/ ___/_ __/ ____/ __ \/ ___/ / | / __ \/ ____/ / ____/ ____/ / / / / ___/ +;;; / /_/ / __/ / / __ / / \__ \ / / / __/ / /_/ /\__ \ / /| | / /_/ / __/ / / / __/ / / / / \__ \ +;;; / _, _/ /___/ /_/ // / ___/ // / / /___/ _, _/___/ / / ___ |/ _, _/ /___ / /___/ /___/ /___/ /______/ / +;;; /_/ |_/_____/\____/___//____//_/ /_____/_/ |_|/____/ /_/ |_/_/ |_/_____/ \____/_____/_____/_____/____/ + +(declaim (inline wam-set-local-register! + wam-set-stack-register! + wam-local-register-address + wam-stack-register-address + wam-local-register-type + wam-stack-register-type + wam-local-register-value + wam-stack-register-value + wam-copy-to-local-register! + wam-copy-to-stack-register! + wam-local-register-address + wam-stack-register-address)) + + +(defun wam-local-register-address (wam register) + (declare (ignore wam)) + register) + +(defun wam-stack-register-address (wam register) + (wam-stack-frame-argument-address wam register)) + + +(defun wam-local-register-type (wam register) + (wam-store-type wam (wam-local-register-address wam register))) + +(defun wam-stack-register-type (wam register) + (wam-store-type wam (wam-stack-register-address wam register))) + + +(defun wam-local-register-value (wam register) + (wam-store-value wam (wam-local-register-address wam register))) + +(defun wam-stack-register-value (wam register) + (wam-store-value wam (wam-stack-register-address wam register))) + + +(defun wam-set-local-register! (wam address type value) + (wam-set-store-cell! wam (wam-local-register-address wam address) + type value)) + +(defun wam-set-stack-register! (wam address type value) + (wam-set-stack-frame-argument! wam address type value)) + + +(defun wam-copy-to-local-register! (wam destination source) + (wam-copy-store-cell! wam (wam-local-register-address wam destination) source)) + +(defun wam-copy-to-stack-register! (wam destination source) + (wam-copy-store-cell! wam (wam-stack-register-address wam destination) source)) + + +;;;; Unification Stack +(declaim (inline wam-unification-stack-push! + wam-unification-stack-pop! + wam-unification-stack-empty-p)) + + +(defun wam-unification-stack-push! (wam address1 address2) + (vector-push-extend address1 (wam-unification-stack wam)) + (vector-push-extend address2 (wam-unification-stack wam))) + +(defun wam-unification-stack-pop! (wam) + (vector-pop (wam-unification-stack wam))) + +(defun wam-unification-stack-empty-p (wam) + (zerop (length (wam-unification-stack wam)))) diff -r 19200659513a -r 81939d20415a src/wam/bytecode.lisp --- a/src/wam/bytecode.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,172 +0,0 @@ -(in-package #:bones.wam) - - -;;;; Opcodes -(defun opcode-name (opcode) - (eswitch (opcode) - (+opcode-noop+ "NOOP") - - (+opcode-get-structure+ "GET-STRUCTURE") - (+opcode-get-variable-local+ "GET-VARIABLE") - (+opcode-get-variable-stack+ "GET-VARIABLE") - (+opcode-get-value-local+ "GET-VALUE") - (+opcode-get-value-stack+ "GET-VALUE") - - (+opcode-put-structure+ "PUT-STRUCTURE") - (+opcode-put-variable-local+ "PUT-VARIABLE") - (+opcode-put-variable-stack+ "PUT-VARIABLE") - (+opcode-put-value-local+ "PUT-VALUE") - (+opcode-put-value-stack+ "PUT-VALUE") - (+opcode-put-void+ "PUT-VOID") - - (+opcode-subterm-variable-local+ "SUBTERM-VARIABLE") - (+opcode-subterm-variable-stack+ "SUBTERM-VARIABLE") - (+opcode-subterm-value-local+ "SUBTERM-VALUE") - (+opcode-subterm-value-stack+ "SUBTERM-VALUE") - (+opcode-subterm-void+ "SUBTERM-VOID") - - (+opcode-jump+ "JUMP") - (+opcode-call+ "CALL") - (+opcode-dynamic-jump+ "DYNAMIC-JUMP") - (+opcode-dynamic-call+ "DYNAMIC-CALL") - (+opcode-proceed+ "PROCEED") - (+opcode-allocate+ "ALLOCATE") - (+opcode-deallocate+ "DEALLOCATE") - (+opcode-done+ "DONE") - (+opcode-try+ "TRY") - (+opcode-retry+ "RETRY") - (+opcode-trust+ "TRUST") - (+opcode-cut+ "CUT") - - (+opcode-get-constant+ "GET-CONSTANT") - (+opcode-put-constant+ "PUT-CONSTANT") - (+opcode-subterm-constant+ "SUBTERM-CONSTANT") - - (+opcode-get-list+ "GET-LIST") - (+opcode-put-list+ "PUT-LIST") - - (+opcode-get-lisp-object+ "GET-LISP-OBJECT") - (+opcode-put-lisp-object+ "PUT-LISP-OBJECT"))) - -(defun opcode-short-name (opcode) - (eswitch (opcode) - (+opcode-noop+ "NOOP") - - (+opcode-get-structure+ "GETS") - (+opcode-get-variable-local+ "GVAR") - (+opcode-get-variable-stack+ "GVAR") - (+opcode-get-value-local+ "GVLU") - (+opcode-get-value-stack+ "GVLU") - - (+opcode-put-structure+ "PUTS") - (+opcode-put-variable-local+ "PVAR") - (+opcode-put-variable-stack+ "PVAR") - (+opcode-put-value-local+ "PVLU") - (+opcode-put-value-stack+ "PVLU") - (+opcode-put-void+ "PVOI") - - (+opcode-subterm-variable-local+ "SVAR") - (+opcode-subterm-variable-stack+ "SVAR") - (+opcode-subterm-value-local+ "SVLU") - (+opcode-subterm-value-stack+ "SVLU") - (+opcode-subterm-void+ "SVOI") - - (+opcode-jump+ "JUMP") - (+opcode-call+ "CALL") - (+opcode-dynamic-jump+ "DYJP") - (+opcode-dynamic-call+ "DYCL") - (+opcode-proceed+ "PROC") - (+opcode-allocate+ "ALOC") - (+opcode-deallocate+ "DEAL") - (+opcode-done+ "DONE") - (+opcode-try+ "TRYM") - (+opcode-retry+ "RTRY") - (+opcode-trust+ "TRST") - (+opcode-cut+ "CUTT") - - (+opcode-get-constant+ "GCON") - (+opcode-put-constant+ "PCON") - (+opcode-subterm-constant+ "UCON") - - (+opcode-get-list+ "GLST") - (+opcode-put-list+ "PLST") - - (+opcode-get-lisp-object+ "GLOB") - (+opcode-put-lisp-object+ "PLOB"))) - - -;;;; Instructions -(define-lookup instruction-size (opcode instruction-size 0) - "Return the size of an instruction for the given opcode. - - The size includes one word for the opcode itself and one for each argument. - - " - (#.+opcode-noop+ 1) - - (#.+opcode-get-structure+ 4) - (#.+opcode-get-variable-local+ 3) - (#.+opcode-get-variable-stack+ 3) - (#.+opcode-get-value-local+ 3) - (#.+opcode-get-value-stack+ 3) - - (#.+opcode-put-structure+ 4) - (#.+opcode-put-variable-local+ 3) - (#.+opcode-put-variable-stack+ 3) - (#.+opcode-put-value-local+ 3) - (#.+opcode-put-value-stack+ 3) - (#.+opcode-put-void+ 2) - - (#.+opcode-subterm-variable-local+ 2) - (#.+opcode-subterm-variable-stack+ 2) - (#.+opcode-subterm-value-local+ 2) - (#.+opcode-subterm-value-stack+ 2) - (#.+opcode-subterm-void+ 2) - - (#.+opcode-jump+ 3) - (#.+opcode-call+ 3) - (#.+opcode-dynamic-jump+ 1) - (#.+opcode-dynamic-call+ 1) - (#.+opcode-proceed+ 1) - (#.+opcode-allocate+ 2) - (#.+opcode-deallocate+ 1) - (#.+opcode-done+ 1) - (#.+opcode-try+ 2) - (#.+opcode-retry+ 2) - (#.+opcode-trust+ 1) - (#.+opcode-cut+ 1) - - (#.+opcode-get-constant+ 3) - (#.+opcode-put-constant+ 3) - (#.+opcode-subterm-constant+ 2) - - (#.+opcode-get-list+ 2) - (#.+opcode-put-list+ 2) - - (#.+opcode-get-lisp-object+ 3) - (#.+opcode-put-lisp-object+ 3)) - - -;;;; Cells -(define-lookup cell-type-name (type string "") - "Return the full name of a cell type." - (#.+cell-type-null+ "NULL") - (#.+cell-type-structure+ "STRUCTURE") - (#.+cell-type-reference+ "REFERENCE") - (#.+cell-type-functor+ "FUNCTOR") - (#.+cell-type-constant+ "CONSTANT") - (#.+cell-type-list+ "LIST") - (#.+cell-type-lisp-object+ "LISP-OBJECT") - (#.+cell-type-stack+ "STACK")) - -(define-lookup cell-type-short-name (type string "") - "Return the short name of a cell type." - (#.+cell-type-null+ "NUL") - (#.+cell-type-structure+ "STR") - (#.+cell-type-reference+ "REF") - (#.+cell-type-functor+ "FUN") - (#.+cell-type-constant+ "CON") - (#.+cell-type-list+ "LIS") - (#.+cell-type-lisp-object+ "OBJ") - (#.+cell-type-stack+ "STK")) - diff -r 19200659513a -r 81939d20415a src/wam/compiler/0-data.lisp --- a/src/wam/compiler/0-data.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,203 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--. . -;;;; ' | \ ,-. |- ,-. -;;;; , | / ,-| | ,-| -;;;; `-^--' `-^ `' `-^ - -;;;; Constants -(defconstant +choice-point-placeholder+ 'choice-point-placeholder) - - -;;;; Utils -(declaim (inline variablep)) - -(defun variablep (term) - (and (symbolp term) - (char= (char (symbol-name term) 0) #\?))) - -(defun lisp-object-to-string (o) - (with-output-to-string (str) - (print-unreadable-object (o str :type t :identity t)))) - -(defun required () - (error "Argument required.")) - - -;;;; Registers -(declaim (inline register-type register-number make-register register= - register-argument-p - register-temporary-p - register-permanent-p - register-anonymous-p)) - - -(deftype register-type () - '(member :argument :local :permanent :anonymous)) - -(deftype register-number () - `(integer 0 ,(1- +register-count+))) - - -(defstruct (register (:constructor make-register (type number))) - (type (required) :type register-type) - (number (required) :type register-number)) - - -(defun make-temporary-register (number arity) - (make-register (if (< number arity) :argument :local) - number)) - -(defun make-permanent-register (number) - (make-register :permanent number)) - -(defun make-anonymous-register () - (make-register :anonymous 0)) - - -(defun register-to-string (register) - (if (eq (register-type register) :anonymous) - "__" - (format nil "~A~D" - (ecase (register-type register) - (:argument #\A) - (:local #\X) - (:permanent #\Y)) - (+ (register-number register) - (if *off-by-one* 1 0))))) - -(defmethod print-object ((object register) stream) - (print-unreadable-object (object stream :identity nil :type nil) - (format stream (register-to-string object)))) - - -(defun register-argument-p (register) - (eq (register-type register) :argument)) - -(defun register-temporary-p (register) - (and (member (register-type register) '(:argument :local)) t)) - -(defun register-permanent-p (register) - (eq (register-type register) :permanent)) - -(defun register-anonymous-p (register) - (eq (register-type register) :anonymous)) - - -(defun register= (r1 r2) - (and (eq (register-type r1) - (register-type r2)) - (= (register-number r1) - (register-number r2)))) - - - -;;;; Clause Properties -;;; When tokenizing/precompiling a clause there are a few pieces of metadata -;;; we're going to need. We group them into a struct to make it easier to pass -;;; everything around. - -(defstruct (clause-properties (:conc-name clause-)) - (nead-vars nil :type list) - (nead-arity 0 :type arity) - (permanent-vars nil :type list) - (anonymous-vars nil :type list)) - - -(defun find-variables (terms) - "Return the set of variables in `terms`." - (let ((variables nil)) - (recursively ((term terms)) - (cond - ((variablep term) (pushnew term variables)) - ((consp term) (recur (car term)) - (recur (cdr term))) - (t nil))) - variables)) - -(defun find-shared-variables (terms) - "Return the set of all variables shared by two or more terms." - (labels - ((count-uses (variable) - (count-if (curry #'tree-member-p variable) terms)) - (shared-p (variable) - (> (count-uses variable) 1))) - (remove-if-not #'shared-p (find-variables terms)))) - -(defun find-permanent-variables (clause) - "Return a list of all the permanent variables in `clause`. - - Permanent variables are those that appear in more than one goal of the clause, - where the head of the clause is considered to be a part of the first goal. - - " - (if (<= (length clause) 2) - (list) ; Facts and chain rules have no permanent variables at all - (destructuring-bind (head body-first . body-rest) clause - ;; The head is treated as part of the first goal for the purposes of - ;; finding permanent variables. - (find-shared-variables (cons (cons head body-first) body-rest))))) - -(defun find-nead-variables (clause) - "Return a list of all variables in the nead of `clause`. - - The head and neck (first term in the body) are the 'nead'. - - " - (if (<= (length clause) 1) - (list) - (destructuring-bind (head body-first . body-rest) clause - (declare (ignore body-rest)) - (find-variables (list head body-first))))) - -(defun find-anonymous-variables (clause) - "Return a list of all anonymous variables in `clause`. - - Anonymous variables are variables that are only ever used once. - - " - (let ((seen nil) - (once nil)) - (recursively ((term clause)) - (cond - ((variablep term) - (if (member term seen) - (when (member term once) - (setf once (delete term once))) - (progn (push term seen) - (push term once)))) - ((consp term) (recur (car term)) - (recur (cdr term))) - (t nil))) - once)) - - -(defun determine-clause-properties (head body) - (let* ((clause - (cons head body)) - (permanent-vars - (if (null head) - ;; For query clauses we cheat a bit and make ALL variables - ;; permanent (except ?, of course), so we can extract their - ;; bindings as results later. - (remove +wildcard-symbol+ (find-variables body)) - (find-permanent-variables clause))) - (anonymous-vars - (if (null head) - ;; Again, for queries we cheat and never let anything be - ;; anonymous (except for the wildcard). - (list +wildcard-symbol+) - (cons +wildcard-symbol+ - (find-anonymous-variables clause)))) - (nead-vars - (set-difference (find-nead-variables clause) - permanent-vars)) - (nead-arity - (max (1- (length head)) - (1- (length (first (remove '! body))))))) ; gross - (make-clause-properties :nead-vars nead-vars - :nead-arity nead-arity - :permanent-vars permanent-vars - :anonymous-vars anonymous-vars))) - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/1-parsing.lisp --- a/src/wam/compiler/1-parsing.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,202 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--. -;;;; '|__/ ,-. ,-. ,-. . ,-. ,-. -;;;; ,| ,-| | `-. | | | | | -;;;; `' `-^ ' `-' ' ' ' `-| -;;;; ,| -;;;; `' - -; todo functor -> fname - -(defstruct node) - - -(defstruct (top-level-node (:include node)) - (functor nil :type symbol) - (arity 0 :type arity) - (arguments nil :type list)) - -(defstruct (vanilla-node (:include node) - (:conc-name node-)) - ;; The register allocated to store this node. - (register nil :type (or null register))) - - -(defstruct (structure-node (:include vanilla-node) - (:conc-name node-)) - (functor nil :type symbol) - (arity 0 :type arity) - (arguments nil :type list)) - -(defstruct (variable-node (:include vanilla-node) - (:conc-name node-)) - (variable nil :type symbol)) - -(defstruct (argument-variable-node (:include variable-node) - (:conc-name node-)) - ;; The register that actually holds the variable (NOT the argument register). - (secondary-register nil :type (or null register))) - -(defstruct (list-node (:include vanilla-node) - (:conc-name node-)) - (head (error "Head argument required") :type node) - (tail (error "Head argument required") :type node)) - -(defstruct (lisp-object-node (:include vanilla-node) - (:conc-name node-)) - (object nil :type t)) - - -(defgeneric node-children (node) - (:documentation - "Return the children of the given node. - - Presumably these will need to be traversed when allocating registers.")) - -(defmethod node-children ((node vanilla-node)) - (list)) - -(defmethod node-children ((node top-level-node)) - (top-level-node-arguments node)) - -(defmethod node-children ((node structure-node)) - (node-arguments node)) - -(defmethod node-children ((node list-node)) - (list (node-head node) (node-tail node))) - - -(defun nil-node-p (node) - "Return whether the given node is the magic nil/0 constant." - (and (typep node 'structure-node) - (eql (node-functor node) nil) - (zerop (node-arity node)))) - - -(defparameter *dump-node-indent* 0) - -(defun print-node-register (node stream &optional space-before) - (when (slot-boundp node 'register) - (format stream (if space-before " ~A =" "~A = ") (node-register node)))) - -(defun print-node-secondary-register (node stream &optional space-before) - (when (slot-boundp node 'secondary-register) - (format stream - (if space-before " ~A =" "~A = ") - (node-secondary-register node)))) - - -(defgeneric dump-node (node)) - -(defmethod dump-node ((node node)) - (format t "~VAAN NODE" *dump-node-indent* "")) - -(defmethod dump-node ((node variable-node)) - (format t "~VA#" (node-variable node))) - -(defmethod dump-node ((node argument-variable-node)) - (format t "~VA#" (node-variable node))) - -(defmethod dump-node ((node structure-node)) - (format t "~VA#")) - -(defmethod dump-node ((node list-node)) - (format t "~VA#")) - -(defmethod dump-node ((node lisp-object-node)) - (format t "~VA#" (lisp-object-to-string (node-object node)))) - -(defmethod dump-node ((node top-level-node)) - (with-slots (functor arity arguments) node - (format t "#<~A/~D" functor arity) - (let ((*dump-node-indent* 4)) - (dolist (n arguments) - (terpri) - (dump-node n))) - (format t ">"))) - -(defmethod print-object ((node node) stream) - (let ((*standard-output* stream)) - (dump-node node))) - - -(defun parse-list (contents) - (if contents - (make-list-node :head (parse (car contents)) - :tail (parse-list (cdr contents))) - (make-structure-node :functor nil - :arity 0 - :arguments ()))) - -(defun parse-list* (contents) - (destructuring-bind (next . remaining) contents - (if (null remaining) - (parse next) - (make-list-node :head (parse next) - :tail (parse-list* remaining))))) - -(defun parse (term &optional top-level-argument) - (cond - ((variablep term) - (if top-level-argument - (make-argument-variable-node :variable term) - (make-variable-node :variable term))) - ((symbolp term) - (parse (list term))) ; c/0 -> (c/0) - ((consp term) - (destructuring-bind (functor . arguments) term - (when (not (symbolp functor)) - (error - "Cannot parse term ~S because ~S is not a valid functor." - term functor)) - (case functor - (list (parse-list arguments)) - (list* (parse-list* arguments)) - (t (make-structure-node :functor functor - :arity (length arguments) - :arguments (mapcar #'parse arguments)))))) - ((numberp term) - (make-lisp-object-node :object term)) - (t (error "Cannot parse term ~S into a Prolog term." term)))) - -(defun parse-top-level (term) - (typecase term - (symbol (parse-top-level (list term))) ; c/0 -> (c/0) - (cons (destructuring-bind (functor . arguments) term - (when (not (symbolp functor)) - (error - "Cannot parse top-level term ~S because ~S is not a valid functor." - term functor)) - (make-top-level-node :functor functor - :arity (length arguments) - :arguments (mapcar (lambda (a) (parse a t)) - arguments)))) - (t (error "Cannot parse top-level term ~S into a Prolog term." term)))) - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/2-register-allocation.lisp --- a/src/wam/compiler/2-register-allocation.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--. . ,. . . . -;;;; `|__/ ,-. ,-. . ,-. |- ,-. ,-. / | | | ,-. ,-. ,-. |- . ,-. ,-. -;;;; )| \ |-' | | | `-. | |-' | /~~|-. | | | | | ,-| | | | | | | -;;;; `' ` `-' `-| ' `-' `' `-' ' ,' `-' `' `' `-' `-' `-^ `' ' `-' ' ' -;;;; ,| -;;;; `' - -;;; You might want to grab a coffee for this one. -;;; -;;; Consider this simple Prolog example: `p(A, q(A, r(B)))`. We're going to get -;;; this as a Lisp list: `(p :a (q :a (r b)))`. -;;; -;;; The goal is to turn this list into a set of register assignments. The book -;;; handwaves around how to do this, and it turns out to be pretty complicated. -;;; This example will (maybe, read on) be turned into: -;;; -;;; A0 <- X2 -;;; A1 <- (q X2 X3) -;;; X2 <- :a -;;; X3 <- (r X4) -;;; X4 <- :b -;;; -;;; There are a few things to note here. First: like the book says, the -;;; outermost predicate is stripped off and returned separately (later it'll be -;;; used to label the code for a program, or to figure out the procedure to call -;;; for a query). -;;; -;;; The first N registers are designated as argument registers. Structure -;;; assignments can live directly in the argument registers, but variables -;;; cannot. In the example above we can see that A1 contains a structure -;;; assignment. However, the variable `:a` doesn't live in A0 -- it lives in -;;; X2, which A0 points at. The books neglects to explain this little fact. -;;; -;;; The next edge case is permanent variables, which the book does talk about. -;;; Permanent variables are allocated to stack registers, so if `:b` was -;;; permanent in our example we'd get: -;;; -;;; A0 <- X2 -;;; A1 <- (q X2 X3) -;;; X2 <- :a -;;; X3 <- (r Y0) -;;; Y0 <- :b -;;; -;;; Note that the mapping of permanent variables to stack register numbers has -;;; to be consistent as we compile all the terms in a clause, so we cheat a bit -;;; here and just always add them all, in order, to the register assignment -;;; produced when parsing. They'll get flattened away later anyway -- it's the -;;; USES that we actually care about. In our example, the `Y0 <- :b` will get -;;; flattened away, but the USE of Y0 in X3 will remain). -;;; -;;; We're almost done, I promise, but there's one more edge case to deal with. -;;; -;;; When we've got a clause with a head and at least one body term, we need the -;;; head term and the first body term to share argument/local registers. For -;;; example, if we have the clause `p(Cats) :- q(A, B, C, Cats)` then when -;;; compiling the head `(p :cats)` we want to get: -;;; -;;; A0 <- X4 -;;; A1 <- ??? -;;; A2 <- ??? -;;; A3 <- ??? -;;; X4 <- :cats -;;; -;;; And when compiling `(q :a :b :c :cats)` we need: -;;; -;;; A0 <- X5 -;;; A1 <- X6 -;;; A2 <- X7 -;;; A3 <- X4 -;;; X4 <- :cats -;;; X5 <- :a -;;; X6 <- :b -;;; X7 <- :c -;;; -;;; What the hell are those empty argument registers in p? And why did we order -;;; the X registers of q like that? -;;; -;;; The book does not bother to mention this important fact at all, so to find -;;; out that you have to handle this you need to do the following: -;;; -;;; 1. Implement it without this behavior. -;;; 2. Notice your results are wrong. -;;; 3. Figure out the right bytecode on a whiteboard. -;;; 4. Try to puzzle out why that bytecode isn't generated when you do exactly -;;; what the book says. -;;; 5. Scour IRC and the web for scraps of information on what the hell you need -;;; to do here. -;;; 6. Find the answer in a comment squirreled away in a source file somewhere -;;; in a language you don't know. -;;; 7. Drink. -;;; -;;; Perhaps you're reading this comment as part of step 6 right now. If so: -;;; welcome aboard. Email me and we can swap horror stories about this process -;;; over drinks some time. -;;; -;;; Okay, so the clause head and first body term need to share argument/local -;;; registers. Why? To understand this, we need to go back to what Prolog -;;; clauses are supposed to do. -;;; -;;; Imagine we have: -;;; -;;; p(f(X)) :- q(X), ...other goals. -;;; -;;; When we want to check if `p(SOMETHING)` is true, we need to first unify -;;; SOMETHING with `f(X)`. Then we search all of the goals in the body, AFTER -;;; substituting in any X's in those goals with the X from the result of the -;;; unification. -;;; -;;; This substitution is why we need the head and the first term in the body to -;;; share the same argument/local registers. By sharing the registers, when the -;;; body term builds a representation of itself on the stack before calling its -;;; predicate any references to X will be point at the (unified) results instead -;;; of fresh ones (because they'll be compiled as `put_value` instead of -;;; `put_variable`). -;;; -;;; But wait: don't we need to substitute into ALL the body terms, not just the -;;; first one? Yes we do, but the trick is that any variables in the REST of -;;; the body that would need to be substituted must, by definition, be permanent -;;; variables! So the substitution process for the rest of the body is handled -;;; automatically with the stack machinery. -;;; -;;; In theory, you could eliminate this edge case by NOT treating the head and -;;; first goal as a single term when searching for permanent variables. Then -;;; all substitution would happen elegantly through the stack. But this -;;; allocates more variables on the stack than you really need (especially for -;;; rules with just a single term in the body (which is many of them)), so we -;;; have this extra corner case to optimize it away. -;;; -;;; In the following code these variables will be called "nead variables" -;;; because: -;;; -;;; 1. They're present in the head of the clause. -;;; 2. They're present in the first term of the body (the "neck", as referred to -;;; in "neck cut" and such). -;;; 3. https://www.urbandictionary.com/define.php?term=nead&defid=1488946 -;;; -;;; We now return you to your regularly scheduled Lisp code. - - -(defstruct allocation-state - (local-registers (make-queue) :type queue) - (stack-registers nil :type list) - (permanent-variables nil :type list) - (anonymous-variables nil :type list) - (reserved-variables nil :type list) - (reserved-arity nil :type (or null arity)) - (actual-arity 0 :type arity)) - - -(defun find-variable (state variable) - "Return the register that already contains this variable, or `nil` otherwise." - (or (when-let (r (position variable - (queue-contents - (allocation-state-local-registers state)))) - (make-temporary-register r (allocation-state-actual-arity state))) - (when-let (s (position variable - (allocation-state-stack-registers state))) - (make-permanent-register s)) - nil)) - -(defun store-variable (state variable) - "Assign `variable` to the next available local register. - - It is assumed that `variable` is not already assigned to another register - (check that with `find-variable` first). - - It is also assumed that this will be a non-argument register, because as - mentioned above variables cannot live directly inside argument registers. - - " - (make-register - :local - (1- (enqueue variable (allocation-state-local-registers state))))) - -(defun ensure-variable (state variable) - (or (find-variable state variable) - (store-variable state variable))) - - -(defmacro set-when-nil ((accessor instance) value-form) - (once-only (instance) - `(when (not (,accessor ,instance)) - (setf (,accessor ,instance) ,value-form)))) - - -(defun variable-anonymous-p (state variable) - "Return whether `variable` is considered anonymous in `state`." - (and (member variable (allocation-state-anonymous-variables state)) t)) - - -(defun allocate-variable-register (state variable) - (if (variable-anonymous-p state variable) - (make-anonymous-register) - (ensure-variable state variable))) - -(defun allocate-nonvariable-register (state) - "Allocate and return a register for something that's not a variable." - ;; We need to allocate registers for things like structures and lists, but we - ;; never need to look them up later (like we do with variables), so we'll just - ;; shove a nil into the local registers array as a placeholder. - (make-temporary-register - (enqueue nil (allocation-state-local-registers state)) - (allocation-state-actual-arity state))) - - -(defgeneric allocate-register (node allocation-state)) - - -(defmethod allocate-register ((node top-level-node) state) - (declare (ignore node state)) - nil) - -(defmethod allocate-register ((node variable-node) state) - (set-when-nil (node-register node) - (allocate-variable-register state (node-variable node)))) - -(defmethod allocate-register ((node argument-variable-node) state) - (set-when-nil (node-secondary-register node) - (allocate-variable-register state (node-variable node)))) - -(defmethod allocate-register ((node structure-node) state) - (set-when-nil (node-register node) - (allocate-nonvariable-register state))) - -(defmethod allocate-register ((node list-node) state) - (set-when-nil (node-register node) - (allocate-nonvariable-register state))) - -(defmethod allocate-register ((node lisp-object-node) state) - (set-when-nil (node-register node) - (allocate-nonvariable-register state))) - - -(defun allocate-argument-registers (node) - (loop :for argument :in (top-level-node-arguments node) - :for i :from 0 - :do (setf (node-register argument) - (make-register :argument i)))) - -(defun allocate-nonargument-registers (node clause-props &key nead) - ;; JESUS TAKE THE WHEEL - (let* - ((actual-arity (top-level-node-arity node)) - (reserved-arity (when nead - (clause-nead-arity clause-props))) - (reserved-variables (when nead - (clause-nead-vars clause-props))) - (permanent-variables (clause-permanent-vars clause-props)) - (local-registers (make-queue)) - ;; We essentially "preallocate" all the permanent variables up front - ;; because we need them to always be in the same stack registers across - ;; all the terms of our clause. - ;; - ;; The ones that won't get used in this term will end up getting - ;; flattened away anyway. - (stack-registers permanent-variables) - (allocation-state - (make-allocation-state - :local-registers local-registers - :stack-registers stack-registers - :permanent-variables permanent-variables - :anonymous-variables (clause-anonymous-vars clause-props) - :reserved-variables reserved-variables - :reserved-arity reserved-arity - :actual-arity actual-arity))) - ;; Preallocate enough registers for all of the arguments. We'll fill - ;; them in later. Note that things are more complicated in the head and - ;; first body term of a clause (see above). - (loop :repeat (or reserved-arity actual-arity) - :do (enqueue nil local-registers)) - ;; Actually reserve the reserved (but non-permanent, see above) variables. - ;; They need to live in consistent spots for the head and first body term. - (loop :for variable :in reserved-variables - :do (enqueue variable local-registers)) - (recursively ((remaining (list node))) - (when remaining - (destructuring-bind (node . remaining) remaining - (allocate-register node allocation-state) - (recur (append remaining (node-children node)))))))) - -(defun allocate-registers (node clause-props &key nead) - (allocate-argument-registers node) - (allocate-nonargument-registers node clause-props :nead nead)) - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/3-flattening.lisp --- a/src/wam/compiler/3-flattening.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--' . . . -;;;; \|__ | ,-. |- |- ,-. ,-. . ,-. ,-. -;;;; | | ,-| | | |-' | | | | | | | -;;;; `' `' `-^ `' `' `-' ' ' ' ' ' `-| -;;;; ,| -;;;; `' - -;;; "Flattening" is the process of turning a parse tree (with register -;;; assignments) into a flat list of nodes, which will then be turned into -;;; a series of instructions. -;;; -;;; The order of this list depends on whether we're compiling a query term or -;;; a program term. -;;; -;;; Turns: -;;; -;;; X0 <- p(X1, X2) -;;; X1 <- A -;;; X2 <- q(X1, X3) -;;; X3 <- B -;;; -;;; into something like: -;;; -;;; X2 <- q(X1, X3) -;;; X0 <- p(X1, X2) - - -(defstruct (register-assignment - (:conc-name assignment-)) - (register (required) :type register)) - - -(defstruct (structure-assignment (:include register-assignment) - (:conc-name assignment-)) - (functor nil :type symbol) - (arity 0 :type arity) - (arguments () :type list)) - -(defstruct (argument-variable-assignment (:include register-assignment) - (:conc-name assignment-)) - (target (required) :type register)) - -(defstruct (list-assignment (:include register-assignment) - (:conc-name assignment-)) - (head (required) :type register) - (tail (required) :type register)) - -(defstruct (lisp-object-assignment (:include register-assignment) - (:conc-name assignment-)) - (object nil :type t)) - - -(defmethod print-object ((assignment structure-assignment) stream) - (print-unreadable-object (assignment stream :type nil :identity nil) - (format stream "~A = ~A/~D(~{~A~^, ~})" - (register-to-string (assignment-register assignment)) - (assignment-functor assignment) - (assignment-arity assignment) - (mapcar #'register-to-string (assignment-arguments assignment))))) - -(defmethod print-object ((assignment argument-variable-assignment) stream) - (print-unreadable-object (assignment stream :type nil :identity nil) - (format stream "~A = ~A" - (register-to-string (assignment-register assignment)) - (register-to-string (assignment-target assignment))))) - -(defmethod print-object ((assignment list-assignment) stream) - (print-unreadable-object (assignment stream :type nil :identity nil) - (format stream "~A = [~A | ~A]" - (register-to-string (assignment-register assignment)) - (register-to-string (assignment-head assignment)) - (register-to-string (assignment-tail assignment))))) - -(defmethod print-object ((assignment lisp-object-assignment) stream) - (print-unreadable-object (assignment stream :type nil :identity nil) - (format stream "~A = ~A" - (register-to-string (assignment-register assignment)) - (lisp-object-to-string (assignment-object assignment))))) - - -(defgeneric node-flatten (node)) - -(defmethod node-flatten (node) - nil) - -(defmethod node-flatten ((node structure-node)) - (values (make-structure-assignment - :register (node-register node) - :functor (node-functor node) - :arity (node-arity node) - :arguments (mapcar #'node-register (node-arguments node))))) - -(defmethod node-flatten ((node argument-variable-node)) - (values (make-argument-variable-assignment - :register (node-register node) - :target (node-secondary-register node)))) - -(defmethod node-flatten ((node list-node)) - (values (make-list-assignment - :register (node-register node) - :head (node-register (node-head node)) - :tail (node-register (node-tail node))))) - -(defmethod node-flatten ((node lisp-object-node)) - (values (make-lisp-object-assignment - :register (node-register node) - :object (node-object node)))) - - -(defun flatten-breadth-first (tree) - (let ((results nil)) - (recursively ((node tree)) - (when-let (assignment (node-flatten node)) - (push assignment results)) - (mapc #'recur (node-children node))) - (nreverse results))) - -(defun flatten-depth-first-post-order (tree) - (let ((results nil)) - (recursively ((node tree)) - (mapc #'recur (node-children node)) - (when-let (assignment (node-flatten node)) - (push assignment results))) - (nreverse results))) - - -(defun flatten-query (tree) - (flatten-depth-first-post-order tree)) - -(defun flatten-program (tree) - (flatten-breadth-first tree)) - - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/4-tokenization.lisp --- a/src/wam/compiler/4-tokenization.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ -(in-package #:bones.wam) - -;;;; ,--,--' . . -;;;; `- | ,-. | , ,-. ,-. . ,_, ,-. |- . ,-. ,-. -;;;; , | | | |< |-' | | | / ,-| | | | | | | -;;;; `-' `-' ' ` `-' ' ' ' '"' `-^ `' ' `-' ' ' - -;;; Tokenizing takes a flattened set of assignments and turns it into a stream -;;; of structure assignments and bare registers. -;;; -;;; It turns: -;;; -;;; X2 <- q(X1, X3) -;;; X0 <- p(X1, X2) -;;; A3 <- X4 -;;; -;;; into something like: -;;; -;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2, (A3 = X4) - - -(defclass token () ()) - - -(defclass register-token (token) - ((register :accessor token-register :type register :initarg :register))) - -(defclass structure-token (register-token) - ((functor :accessor token-functor :type symbol :initarg :functor) - (arity :accessor token-arity :type arity :initarg :arity))) - -(defclass argument-variable-token (register-token) - ((target :accessor token-target :type register :initarg :target))) - -(defclass list-token (register-token) ()) - -(defclass lisp-object-token (register-token) - ((object :accessor token-object :type t :initarg :object))) - -(defclass procedure-call-token () - ((functor :accessor token-functor :type symbol :initarg :functor) - (arity :accessor token-arity :type arity :initarg :arity))) - -(defclass call-token (procedure-call-token) ()) - -(defclass jump-token (procedure-call-token) ()) - -(defclass cut-token (token) ()) - - -(defun make-register-token (register) - (values (make-instance 'register-token :register register))) - - -(defmethod print-object ((token register-token) stream) - (print-object (token-register token) stream)) - -(defmethod print-object ((token structure-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "~A = ~A/~D" - (register-to-string (token-register token)) - (token-functor token) - (token-arity token)))) - -(defmethod print-object ((token argument-variable-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "~A = ~A" - (register-to-string (token-register token)) - (register-to-string (token-target token))))) - -(defmethod print-object ((token list-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "~A = LIST" (register-to-string (token-register token))))) - -(defmethod print-object ((token lisp-object-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "~A = ~A" - (register-to-string (token-register token)) - (lisp-object-to-string (token-object token))))) - -(defmethod print-object ((token call-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "CALL ~A/~D" - (token-functor token) - (token-arity token)))) - -(defmethod print-object ((token jump-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "JUMP ~A/~D" - (token-functor token) - (token-arity token)))) - -(defmethod print-object ((token cut-token) stream) - (print-unreadable-object (token stream :identity nil :type nil) - (format stream "CUT!"))) - - -(defgeneric tokenize-assignment (assignment) - (:documentation "Tokenize `assignment` into a flat list of tokens.")) - -(defmethod tokenize-assignment ((assignment structure-assignment)) - (list* (make-instance 'structure-token - :register (assignment-register assignment) - :functor (assignment-functor assignment) - :arity (assignment-arity assignment)) - (mapcar #'make-register-token (assignment-arguments assignment)))) - -(defmethod tokenize-assignment ((assignment argument-variable-assignment)) - (list (make-instance 'argument-variable-token - :register (assignment-register assignment) - :target (assignment-target assignment)))) - -(defmethod tokenize-assignment ((assignment list-assignment)) - (list (make-instance 'list-token :register (assignment-register assignment)) - (make-register-token (assignment-head assignment)) - (make-register-token (assignment-tail assignment)))) - -(defmethod tokenize-assignment ((assignment lisp-object-assignment)) - (list (make-instance 'lisp-object-token - :register (assignment-register assignment) - :object (assignment-object assignment)))) - -(defun tokenize-assignments (assignments) - "Tokenize a flattened set of register assignments into a stream." - (mapcan #'tokenize-assignment assignments)) - - -(defun tokenize-program-term (term clause-props) - "Tokenize `term` as a program term, returning its tokens." - (let ((tree (parse-top-level term))) - (allocate-registers tree clause-props :nead t) - (-> tree flatten-program tokenize-assignments))) - -(defun tokenize-query-term (term clause-props &key in-nead is-tail) - "Tokenize `term` as a query term, returning its tokens." - (let ((tree (parse-top-level term))) - (allocate-registers tree clause-props :nead in-nead) - (-<> tree - flatten-query - tokenize-assignments - ;; We need to shove a CALL/JUMP token onto the end. - (append <> (list (make-instance - (if is-tail 'jump-token 'call-token) - :functor (top-level-node-functor tree) - :arity (top-level-node-arity tree))))))) - - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/5-precompilation.lisp --- a/src/wam/compiler/5-precompilation.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,420 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--. . . -;;;; '|__/ ,-. ,-. ,-. ,-. ,-,-. ,-. . | ,-. |- . ,-. ,-. -;;;; ,| | |-' | | | | | | | | | | ,-| | | | | | | -;;;; `' ' `-' `-' `-' ' ' ' |-' ' `' `-^ `' ' `-' ' ' -;;;; | -;;;; ' - -;;; Once we have a tokenized stream we can generate the machine instructions -;;; from it. -;;; -;;; We don't generate the ACTUAL bytecode immediately, because we want to run -;;; a few optimization passes on it first, and it's easier to work with if we -;;; have a friendlier format. -;;; -;;; So we turn a stream of tokens: -;;; -;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2 -;;; -;;; into a list of instructions, each of which is a list: -;;; -;;; (:put-structure X2 q 2) -;;; (:subterm-variable X1) -;;; (:subterm-variable X3) -;;; (:put-structure X0 p 2) -;;; (:subterm-value X1) -;;; (:subterm-value X2) -;;; -;;; The opcodes are keywords and the register arguments remain register objects. -;;; They get converted down to the raw bytes in the final "rendering" step. -;;; -;;; # Cut -;;; -;;; A quick note on cut (!): the book and original WAM do some nutty things to -;;; save one stack word per frame. They store the cut register for non-neck -;;; cuts in a "pseudovariable" on the stack, so they only have to allocate that -;;; extra stack word for things that actually USE non-neck cuts. -;;; -;;; We're going to just eat the extra stack word and store the cut register in -;;; every frame instead. This massively simplifies the implementation and lets -;;; me keep my sanity, and it MIGHT even end up being faster because there's -;;; one fewer opcode, less fucking around in the compiler, etc. But regardless: -;;; I don't want to go insane, and my laptop has sixteen gigabytes of RAM, so -;;; let's just store the damn word. -;;; -;;; # "Seen" Registers -;;; -;;; The book neglects to mention some REALLY important information about how you -;;; have to handle registers when compiling a stream of tokens. But if you've -;;; made it this far, you should be pretty used to the book omitting vital -;;; information. So hop in the clown car and take a ride with me. -;;; -;;; From the very beginning,the book mentions that certain instructions come in -;;; pairs, the first of which is used the first time the register is "seen" or -;;; "encountered", and the second used of which is used subsequent times. -;;; -;;; For example, a simple query like `p(A, A, A)` would result in: -;;; -;;; put-variable A0 X3 -;;; put-value A1 X3 -;;; put-value A2 X3 -;;; call p/3 -;;; -;;; This is all fine and dandy and works for single goals, but if you have -;;; a clause with MULTIPLE body goals you need to "reset" the list of -;;; already-seen registers after each goal. For example, consider: -;;; -;;; p() :- -;;; f(X, X), -;;; g(Y, Y). -;;; -;;; If you just apply what the book says without resetting the already-seen -;;; register list, you get: -;;; -;;; put-variable A0 X2 -;;; put-value A1 X2 -;;; call f/2 -;;; put-value A0 X2 <--- wrong! -;;; put-value A1 X2 -;;; call g/2 -;;; -;;; But the variable in `g/2` is DIFFERENT than the one used in `f/2`, so that -;;; second `put-value` instruction is wrong! What we need instead is this: -;;; -;;; put-variable A0 X2 -;;; put-value A1 X2 -;;; call f/2 -;;; put-variable A0 X2 <--- right! -;;; put-value A1 X2 -;;; call g/2 -;;; -;;; So the list of seen registers needs to get cleared after each body goal. -;;; -;;; But be careful: it's only TEMPORARY registers that need to get cleared! If -;;; the variables in our example WEREN'T different (`p() :- f(X, X), g(X, X)`) -;;; the instructions would be assigning to stack registers, and we WANT to do -;;; one `put-variable` and have the rest be `put-value`s. -;;; -;;; And there's one more edge case you're probably wondering about: what happens -;;; after the HEAD of a clause? Do we need to reset? The answer is: no, -;;; because the head and first body goal share registers, which is what performs -;;; the "substitution" for the first body goal (see the comment earlier for more -;;; on that rabbit hole). - - -(defun find-opcode-register (first-seen register) - (let ((register-variant (when register - (ecase (register-type register) - ((:local :argument) :local) - ((:permanent) :stack) - ((:anonymous) :void))))) - (if first-seen - (ecase register-variant - (:local :subterm-variable-local) - (:stack :subterm-variable-stack) - (:void :subterm-void)) - (ecase register-variant - (:local :subterm-value-local) - (:stack :subterm-value-stack) - (:void :subterm-void))))) - -(defun find-opcode-list (mode) - (ecase mode - (:program :get-list) - (:query :put-list))) - -(defun find-opcode-lisp-object (mode) - (ecase mode - (:program :get-lisp-object) - (:query :put-lisp-object))) - -(defun find-opcode-structure (mode) - (ecase mode - (:program :get-structure) - (:query :put-structure))) - -(defun find-opcode-argument (first-seen mode register) - (let ((register-variant (ecase (register-type register) - ((:local :argument) :local) - ((:permanent) :stack)))) - (if first-seen - (ecase mode - (:program (ecase register-variant - (:local :get-variable-local) - (:stack :get-variable-stack))) - (:query (ecase register-variant - (:local :put-variable-local) - (:stack :put-variable-stack)))) - (ecase mode - (:program (ecase register-variant - (:local :get-value-local) - (:stack :get-value-stack))) - (:query (ecase register-variant - (:local :put-value-local) - (:stack :put-value-stack))))))) - - -(defun precompile-tokens (head-tokens body-tokens) - "Generate a series of machine instructions from a stream of head and body - tokens. - - The `head-tokens` should be program-style tokens, and are compiled in program - mode. The `body-tokens` should be query-style tokens, and are compiled in - query mode. - - Actual queries are a special case where the `head-tokens` stream is `nil` - - The compiled instructions will be returned as a circle. - - " - (let ((seen (list)) - (mode nil) - (instructions (make-empty-circle))) - (labels - ((push-instruction (&rest instruction) - (circle-insert-end instructions instruction)) - (reset-seen () - ;; Reset the list of seen registers (grep for "clown car" above) - (setf seen (remove-if #'register-temporary-p seen))) - (handle-argument (argument-register source-register) - (if (register-anonymous-p source-register) - (ecase mode - ;; Query terms need to put an unbound var into their argument - ;; register for each anonymous variable. - (:query (push-instruction :put-void argument-register)) - ;; Crazy, but for program terms we can just drop - ;; argument-position anonymous variables on the floor. - (:program nil)) - ;; OP X_n A_i - (let ((first-seen (push-if-new source-register seen :test #'register=))) - (push-instruction - (find-opcode-argument first-seen mode source-register) - source-register - argument-register)))) - (handle-structure (destination-register functor arity) - ;; OP functor reg - (push destination-register seen) - (push-instruction (find-opcode-structure mode) - functor - arity - destination-register)) - (handle-list (register) - (push register seen) - (push-instruction (find-opcode-list mode) - register)) - (handle-lisp-object (register object) - ;; OP object register - (push register seen) - (push-instruction (find-opcode-lisp-object mode) object register)) - (handle-cut () - (push-instruction :cut)) - (handle-procedure-call (functor arity is-jump) - (if (and (eq functor 'call) - (= arity 1)) - ;; DYNAMIC-[CALL/JUMP] - (push-instruction (if is-jump :dynamic-jump :dynamic-call)) - ;; [CALL/JUMP] functor - (push-instruction (if is-jump :jump :call) functor arity)) - ;; This is a little janky, but at this point the body goals have been - ;; turned into one single stream of tokens, so we don't have a nice - ;; clean way to tell when one ends. But in practice, a body goal is - ;; going to end with a CALL instruction, so we can use this as - ;; a kludge to know when to reset. - ;; - ;; TODO: We should probably dekludge this by emitting an extra "end - ;; body goal" token, especially once we add some special forms that - ;; might need to do some resetting but not end in a CALL. - (reset-seen)) - (handle-register (register) - (if (register-anonymous-p register) - ;; VOID 1 - (push-instruction (find-opcode-register nil register) 1) - ;; OP reg - (let ((first-seen (push-if-new register seen :test #'register=))) - (push-instruction - (find-opcode-register first-seen register) - register)))) - (handle-token (token) - (etypecase token - (argument-variable-token - (handle-argument (token-register token) - (token-target token))) - (structure-token - (handle-structure (token-register token) - (token-functor token) - (token-arity token))) - (list-token - (handle-list (token-register token))) - (lisp-object-token - (handle-lisp-object (token-register token) - (token-object token))) - (cut-token - (handle-cut)) - (jump-token - (handle-procedure-call (token-functor token) - (token-arity token) - t)) - (call-token - (handle-procedure-call (token-functor token) - (token-arity token) - nil)) - (register-token - (handle-register (token-register token))))) - (handle-stream (tokens) - (map nil #'handle-token tokens))) - (when head-tokens - (setf mode :program) - (handle-stream head-tokens)) - (setf mode :query) - (handle-stream body-tokens) - instructions))) - - -(defun precompile-clause (head body) - "Precompile the clause. - - `head` should be the head of the clause for program clauses, or `nil` for - query clauses. - - `body` is the body of the clause, or `nil` for facts. - - Returns a circle of instructions and the properties of the clause. - - " - (let* ((clause-props - (determine-clause-properties head body)) - (head-tokens - (when head - (tokenize-program-term head clause-props))) - (clause-type - (cond ((null head) :query) - ((null body) :fact) - ((null (rest body)) :chain) - (t :rule))) - (body-tokens - (when body - (loop - :with first = t - :for (goal . remaining) :on body - :append - (if (eq goal '!) ; gross - ;; cut just gets emitted straight, but DOESN'T flip `first`... - ;; TODO: fix the cut layering violation here... - (list (make-instance 'cut-token)) - (prog1 - (tokenize-query-term - goal clause-props - :in-nead first - ;; For actual WAM queries we're running, we don't want to - ;; LCO the final CALL because we need that stack frame - ;; (for storing the results). - :is-tail (and (not (eq clause-type :query)) - (null remaining))) - (setf first nil))))))) - (let ((instructions (precompile-tokens head-tokens body-tokens)) - (variable-count (length (clause-permanent-vars clause-props)))) - ;; We need to compile facts and rules differently. Facts end with - ;; a PROCEED and rules are wrapped in ALOC/DEAL. - (ecase clause-type - (:chain - ;; Chain rules don't need anything at all. They just unify, set up - ;; the next predicate's arguments, and JUMP. By definition, in a chain - ;; rule all variables must be temporary, so we don't need a stack frame - ;; at all! - nil) - (:rule ; a full-ass rule - ;; Non-chain rules need an ALLOC at the head and a DEALLOC right before - ;; the tail call: - ;; - ;; ALLOC n - ;; ... - ;; DEAL - ;; JUMP - (circle-insert-beginning instructions `(:allocate ,variable-count)) - (circle-insert-before (circle-backward instructions) `(:deallocate))) - - (:fact - (circle-insert-end instructions `(:proceed))) - - (:query - ;; The book doesn't have this ALOC here, but we do it to aid in result - ;; extraction. Basically, to make extracting th results of a query - ;; easier we allocate all of its variables on the stack, so we need - ;; push a stack frame for them before we get started. We don't DEAL - ;; because we want the frame to be left on the stack at the end so we - ;; can poke at it. - (circle-insert-beginning instructions `(:allocate ,variable-count)) - (circle-insert-end instructions `(:done)))) - (values instructions clause-props)))) - - -(defun precompile-query (query) - "Compile `query`, returning the instructions and permanent variables. - - `query` should be a list of goal terms. - - " - (multiple-value-bind (instructions clause-props) - (precompile-clause nil query) - (values instructions - (clause-permanent-vars clause-props)))) - - -(defun find-predicate (clause) - "Return the functor and arity of the predicate of `clause`." - ;; ( (f ?x ?y) | head ||| clause - ;; (foo ?x) || body ||| - ;; (bar ?y) ) || ||| - (let ((head (car clause))) - (etypecase head - (null (error "Clause ~S has a NIL head." clause)) - (symbol (values head 0)) ; constants are 0-arity - (cons (values (car head) ; (f ...) - (1- (length head)))) - (t (error "Clause ~S has a malformed head." clause))))) - - -(defun precompile-rules (rules) - "Compile a single predicate's `rules` into a list of instructions. - - All the rules must for the same predicate. This is not checked, for - performance reasons. Don't fuck it up. - - Each rule in `rules` should be a clause consisting of a head term and zero or - more body terms. A rule with no body is called a fact. - - Returns the circle of compiled instructions, as well as the functor and arity - of the rules being compiled. - - " - (assert rules () "Cannot compile an empty program.") - (multiple-value-bind (functor arity) (find-predicate (first rules)) - (values - (if (= 1 (length rules)) - ;; Single-clause rules don't need to bother setting up a choice point. - (destructuring-bind ((head . body)) rules - (precompile-clause head body)) - ;; Otherwise we need to loop through each of the clauses, pushing their - ;; choice point instruction first, then their actual code. - ;; - ;; The `nil` clause addresses will get filled in later, during rendering. - (loop :with instructions = (make-empty-circle) - :for ((head . body) . remaining) :on rules - :for first-p = t :then nil - :for last-p = (null remaining) - :for clause-instructions = (precompile-clause head body) - :do (progn - (circle-insert-end - instructions - (cond (first-p `(:try ,+choice-point-placeholder+)) - (last-p `(:trust)) - (t `(:retry ,+choice-point-placeholder+)))) - (circle-append-circle instructions clause-instructions)) - :finally (return instructions))) - functor - arity))) - - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/6-optimization.lisp --- a/src/wam/compiler/6-optimization.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -(in-package #:bones.wam) - -;;;; ,,--. . . -;;;; |`, | ,-. |- . ,-,-. . ,_, ,-. |- . ,-. ,-. -;;;; | | | | | | | | | | / ,-| | | | | | | -;;;; `---' |-' `' ' ' ' ' ' '"' `-^ `' ' `-' ' ' -;;;; | -;;;; ' - -;;; Optimization of the WAM instructions happens between the precompilation -;;; phase and the rendering phase. We perform a number of passes over the -;;; circle of instructions, doing one optimization each time. - - -(defun optimize-get-constant (node constant register) - ;; 1. get_structure c/0, Ai -> get_constant c, Ai - (circle-replace node `(:get-constant ,constant ,register))) - -(defun optimize-put-constant (node constant register) - ;; 2. put_structure c/0, Ai -> put_constant c, Ai - (circle-replace node `(:put-constant ,constant ,register))) - -(defun optimize-subterm-constant-query (node constant register) - ;; 3. put_structure c/0, Xi *** WE ARE HERE - ;; ... - ;; subterm_value Xi -> subterm_constant c - (loop - :with previous = (circle-prev node) - ;; Search for the corresponding set-value instruction - :for n = (circle-forward-remove node) :then (circle-forward n) - :while n - :for (opcode . arguments) = (circle-value n) - :when (and (eql opcode :subterm-value-local) - (register= register (first arguments))) - :do - (circle-replace n `(:subterm-constant ,constant)) - (return previous))) - -(defun optimize-subterm-constant-program (node constant register) - ;; 4. subterm_variable Xi -> subterm_constant c - ;; ... - ;; get_structure c/0, Xi *** WE ARE HERE - (loop - ;; Search backward for the corresponding subterm-variable instruction - :for n = (circle-backward node) :then (circle-backward n) - :while n - :for (opcode . arguments) = (circle-value n) - :when (and (eql opcode :subterm-variable-local) - (register= register (first arguments))) - :do - (circle-replace n `(:subterm-constant ,constant)) - (return (circle-backward-remove node)))) - - -(defun optimize-constants (instructions) - ;; From the book and the erratum, there are four optimizations we can do for - ;; constants (0-arity structures). - - (flet ((optimize-put (node functor register) - (if (register-argument-p register) - (optimize-put-constant node functor register) - (optimize-subterm-constant-query node functor register))) - (optimize-get (node functor register) - (if (register-argument-p register) - (optimize-get-constant node functor register) - (optimize-subterm-constant-program node functor register)))) - (loop - :for node = (circle-forward instructions) :then (circle-forward node) - :while node :do - (destructuring-bind (opcode . arguments) (circle-value node) - (when (member opcode '(:put-structure :get-structure)) - (destructuring-bind (functor arity register) arguments - (when (zerop arity) - (setf node - (case opcode - (:put-structure (optimize-put node functor register)) - (:get-structure (optimize-get node functor register)))))))))) - instructions) - - -(defun optimize-void-runs (instructions) - ;; We can optimize runs of N (:unify-void 1) instructions into a single one - ;; that does all N at once. - (loop - :for node = (circle-forward instructions) :then (circle-forward node) - :while node - :for opcode = (car (circle-value node)) - :when (eq opcode :subterm-void) - :do - (loop - :with beginning = (circle-backward node) - :for run-node = node :then (circle-forward run-node) - :for run-opcode = (car (circle-value run-node)) - :while (eq opcode run-opcode) - :do (circle-remove run-node) - :sum 1 :into run-length fixnum ; lol - :finally - (progn - (setf node (circle-forward beginning)) - (circle-insert-after beginning - `(,opcode ,run-length))))) - instructions) - - -(defun optimize-instructions (instructions) - (->> instructions - (optimize-constants) - (optimize-void-runs))) - - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/7-rendering.lisp --- a/src/wam/compiler/7-rendering.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,156 +0,0 @@ -(in-package #:bones.wam) - -;;;; .-,--. . -;;;; `|__/ ,-. ,-. ,-| ,-. ,-. . ,-. ,-. -;;;; )| \ |-' | | | | |-' | | | | | | -;;;; `' ` `-' ' ' `-^ `-' ' ' ' ' `-| -;;;; ,| -;;;; `' - -;;; Rendering is the act of taking the friendly list-of-instructions format and -;;; actually converting it to raw-ass bytes and storing it in an array. - - -(defun check-instruction (opcode arguments) - (assert (= (length arguments) - (1- (instruction-size opcode))) - () - "Cannot push opcode ~A with ~D arguments ~S, it requires exactly ~D." - (opcode-name opcode) - (length arguments) - arguments - (1- (instruction-size opcode)))) - - -(defun code-push-instruction (store opcode arguments address) - "Push the given instruction into `store` at `address`. - - `arguments` should be a list of `code-word`s. - - Returns how many words were pushed. - - " - (check-instruction opcode arguments) - (setf (aref store address) opcode - (subseq store (1+ address)) arguments) - (instruction-size opcode)) - - -(defun render-opcode (opcode-designator) - (ecase opcode-designator - (:get-structure +opcode-get-structure+) - (:get-variable-local +opcode-get-variable-local+) - (:get-variable-stack +opcode-get-variable-stack+) - (:get-value-local +opcode-get-value-local+) - (:get-value-stack +opcode-get-value-stack+) - (:put-structure +opcode-put-structure+) - (:put-variable-local +opcode-put-variable-local+) - (:put-variable-stack +opcode-put-variable-stack+) - (:put-value-local +opcode-put-value-local+) - (:put-value-stack +opcode-put-value-stack+) - (:put-void +opcode-put-void+) - (:subterm-variable-local +opcode-subterm-variable-local+) - (:subterm-variable-stack +opcode-subterm-variable-stack+) - (:subterm-value-local +opcode-subterm-value-local+) - (:subterm-value-stack +opcode-subterm-value-stack+) - (:subterm-void +opcode-subterm-void+) - (:put-constant +opcode-put-constant+) - (:get-constant +opcode-get-constant+) - (:subterm-constant +opcode-subterm-constant+) - (:get-list +opcode-get-list+) - (:put-list +opcode-put-list+) - (:get-lisp-object +opcode-get-lisp-object+) - (:put-lisp-object +opcode-put-lisp-object+) - (:jump +opcode-jump+) - (:call +opcode-call+) - (:dynamic-jump +opcode-dynamic-jump+) - (:dynamic-call +opcode-dynamic-call+) - (:proceed +opcode-proceed+) - (:allocate +opcode-allocate+) - (:deallocate +opcode-deallocate+) - (:done +opcode-done+) - (:try +opcode-try+) - (:retry +opcode-retry+) - (:trust +opcode-trust+) - (:cut +opcode-cut+))) - -(defun render-argument (argument) - (cond - ;; Ugly choice point args that'll be filled later... - ((eq +choice-point-placeholder+ argument) 0) - - ;; Bytecode just needs the register numbers. - ((typep argument 'register) (register-number argument)) - - ;; Everything else just gets shoved right into the array. - (t argument))) - -(defun render-bytecode (store instructions start limit) - "Render `instructions` (a circle) into `store` starting at `start`. - - Bail if ever pushed beyond `limit`. - - Return the total number of code words rendered. - - " - (let ((previous-jump nil)) - (flet - ((fill-previous-jump (address) - (when previous-jump - (setf (aref store (1+ previous-jump)) address)) - (setf previous-jump address))) - (loop - :with address = start - - ;; Render the next instruction - :for node = (circle-forward instructions) - :then (or (circle-forward node) - (return instruction-count)) - - :for (opcode-designator . arguments) = (circle-value node) - :for opcode = (render-opcode opcode-designator) - :for size = (instruction-size opcode) - :summing size :into instruction-count - - ;; Make sure we don't run past the end of our section. - :when (>= (+ size address) limit) - :do (error "Code store exhausted, game over.") - - :do (code-push-instruction store - opcode - (mapcar #'render-argument arguments) - address) - - ;; We need to fill in the addresses for the choice point jumping - ;; instructions. For example, when we have TRY ... TRUST, the TRUST - ;; needs to patch its address into the TRY instruction. - ;; - ;; I know, this is ugly, sorry. - :when (member opcode-designator '(:try :retry :trust)) - :do (fill-previous-jump address) - - ;; look, don't judge me, i told you i know its bad - :do (incf address size))))) - - -(defun render-query-into (storage instructions) - (render-bytecode storage instructions 0 +maximum-query-size+)) - - -(defun mark-label (wam functor arity address) - "Set the code label `functor`/`arity` to point at `address`." - (setf (wam-code-label wam functor arity) - address)) - -(defun render-rules (wam functor arity instructions) - ;; Before we render the instructions, make the label point at where they're - ;; about to go. - (mark-label wam functor arity (wam-code-pointer wam)) - (incf (wam-code-pointer wam) - (render-bytecode (wam-code wam) - instructions - (wam-code-pointer wam) - (array-total-size (wam-code wam))))) - - - diff -r 19200659513a -r 81939d20415a src/wam/compiler/8-ui.lisp --- a/src/wam/compiler/8-ui.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -(in-package #:bones.wam) - -;;;; ,-. . ,-_/ . -;;;; | | ,-. ,-. ,-. ' | ,-. |- ,-. ,-. ," ,-. ,-. ,-. -;;;; | | . `-. |-' | .^ | | | | |-' | |- ,-| | |-' -;;;; `--^-' `-' `-' ' `--' ' ' `' `-' ' | `-^ `-' `-' -;;;; ' - -;;; The final phase wraps everything else up into a sane UI. - -(defun %compile-query-into (storage query) - (multiple-value-bind (instructions permanent-variables) - (precompile-query query) - (optimize-instructions instructions) - (values permanent-variables - (render-query-into storage instructions)))) - -(defun compile-query (wam query) - "Compile `query` into the query section of the WAM's code store. - - `query` should be a list of goal terms. - - Returns the permanent variables and the size of the compiled bytecode. - - " - (%compile-query-into (wam-code wam) query)) - -(defun compile-query-into (storage query) - "Compile `query` into the given array `storage`. - - `query` should be a list of goal terms. - - Returns the permanent variables and the size of the compiled bytecode. - - " - (%compile-query-into storage query)) - - -(defun compile-rules (wam rules) - "Compile `rules` into the WAM's code store. - - Each rule in `rules` should be a clause consisting of a head term and zero or - more body terms. A rule with no body is called a fact. - - " - (multiple-value-bind (instructions functor arity) - (precompile-rules rules) - (optimize-instructions instructions) - (render-rules wam functor arity instructions))) - diff -r 19200659513a -r 81939d20415a src/wam/constants.lisp --- a/src/wam/constants.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ -(in-package #:bones.wam) - -(defmacro define-constants (count-symbol &rest symbols) - `(progn - ,@(loop :for c :from 0 - :for s :in symbols - :collect `(define-constant ,s ,c)) - (define-constant ,count-symbol ,(length symbols)))) - - -(define-constant +code-word-size+ 60 - :documentation "Size (in bits) of each word in the code store.") - -(define-constant +code-limit+ (expt 2 +code-word-size+) - :documentation "Maximum size of the WAM code store.") - -(define-constant +code-sentinel+ (1- +code-limit+) - ; TODO: Should this sentinel value be 0 like everything else? - :documentation "Sentinel value used in the PC and CP.") - - -(define-constants +number-of-cell-types+ - +cell-type-null+ - +cell-type-structure+ - +cell-type-reference+ - +cell-type-functor+ - +cell-type-constant+ - +cell-type-list+ - +cell-type-lisp-object+ - +cell-type-stack+) - - -(define-constant +register-count+ 2048 - :documentation "The number of local registers the WAM has available.") - -(define-constant +maximum-arity+ 1024 - :documentation "The maximum allowed arity of functors.") - - -;; TODO Make all this shit configurable at runtime -(define-constant +stack-limit+ 4096 - :documentation "Maximum size of the WAM stack.") - -(define-constant +stack-frame-size-limit+ (+ 7 +register-count+) - :documentation "The maximum size, in stack frame words, that a stack frame could be.") - - -(define-constant +maximum-query-size+ 1024 - :documentation - "The maximum size (in bytes of bytecode) a query may compile to.") - -(define-constant +maximum-instruction-size+ 4 - :documentation - "The maximum number of code words an instruction (including opcode) might be.") - -(define-constant +code-query-start+ 0 - :documentation "The address in the code store where the query code begins.") - -(define-constant +code-main-start+ +maximum-query-size+ - :documentation "The address in the code store where the main program code begins.") - - -(define-constant +stack-start+ +register-count+ - :documentation "The address in the store of the first cell of the stack.") - -(define-constant +stack-end+ (+ +stack-start+ +stack-limit+) - :documentation - "The address in the store one past the last cell in the stack.") - -(define-constant +heap-start+ +stack-end+ - :documentation "The address in the store of the first cell of the heap.") - - -(define-constant +trail-limit+ array-total-size-limit - ;; TODO: should probably limit this to something more reasonable - :documentation "The maximum number of variables that may exist in the trail.") - -(define-constant +store-limit+ array-total-size-limit - :documentation "Maximum size of the WAM store.") - -(define-constant +heap-limit+ (- +store-limit+ +register-count+ +stack-limit+) - ;; The heap gets whatever's left over after the registers and stack have taken - ;; their chunk of memory. - :documentation "Maximum size of the WAM heap.") - -(define-constant +functor-limit+ array-total-size-limit - ;; Functors are stored in a functor table. - :documentation "The maximum number of functors the WAM can keep track of.") - - -(define-constant +wildcard-symbol+ '?) - - -;;;; Opcodes -(define-constants +number-of-opcodes+ - +opcode-noop+ - - ;; Program - +opcode-get-structure+ - +opcode-get-variable-local+ - +opcode-get-variable-stack+ - +opcode-get-value-local+ - +opcode-get-value-stack+ - - ;; Query - +opcode-put-structure+ - +opcode-put-variable-local+ - +opcode-put-variable-stack+ - +opcode-put-value-local+ - +opcode-put-value-stack+ - +opcode-put-void+ - - ;; Subterm - +opcode-subterm-variable-local+ - +opcode-subterm-variable-stack+ - +opcode-subterm-value-local+ - +opcode-subterm-value-stack+ - +opcode-subterm-void+ - - ;; Control - +opcode-jump+ - +opcode-call+ - +opcode-dynamic-jump+ - +opcode-dynamic-call+ - +opcode-proceed+ - +opcode-allocate+ - +opcode-deallocate+ - +opcode-done+ - +opcode-try+ - +opcode-retry+ - +opcode-trust+ - +opcode-cut+ - - ;; Constants - +opcode-get-constant+ - +opcode-put-constant+ - +opcode-subterm-constant+ - - ;; Lists - +opcode-get-list+ - +opcode-put-list+ - - ;; Lisp Objects - +opcode-get-lisp-object+ - +opcode-put-lisp-object+) - - -;;;; Debug Config -(defparameter *off-by-one* nil) diff -r 19200659513a -r 81939d20415a src/wam/dump.lisp --- a/src/wam/dump.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,378 +0,0 @@ -(in-package #:bones.wam) - -(defun heap-debug (wam address indent-p) - (format - nil "~A~A" - (if indent-p - " " - "") - (cell-typecase (wam address) - ((:reference r) (if (= address r) - "unbound variable " - (format nil "var pointer to ~8,'0X " r))) - ((:structure s) (format nil "struct pointer to ~8,'0X " s)) - ((:functor f) (format nil "functor symbol ~A " f)) - ((:constant c) (format nil "constant symbol ~A " c)) - (t "")))) - - -(defun dump-cell-value (value) - ;; todo flesh this out - (typecase value - (fixnum (format nil "~16,'0X" value)) - (t (format nil "~16<#~;~>")))) - - -(defun dump-heap (wam from to) - ;; This code is awful, sorry. - (format t "HEAP~%") - (format t " +----------+-----+------------------+--------------------------------------+~%") - (format t " | ADDR | TYP | VALUE | DEBUG |~%") - (format t " +----------+-----+------------------+--------------------------------------+~%") - (when (> from (1+ +heap-start+)) - (format t " | â‹® | â‹® | â‹® | |~%")) - (flet ((print-cell (address indent) - (format t " | ~8,'0X | ~A | ~16,'0X | ~36A |~%" - address - (cell-type-short-name (wam-store-type wam address)) - (dump-cell-value (wam-store-value wam address)) - (heap-debug wam address (plusp indent))))) - (loop :with indent = 0 - :for address :from from :below to - :do (progn - (print-cell address indent) - (cell-typecase (wam address) - ((:functor f n) (declare (ignore f)) (setf indent n)) - (t (when (not (zerop indent)) - (decf indent))))))) - (when (< to (wam-heap-pointer wam)) - (format t " | â‹® | â‹® | â‹® | |~%")) - (format t " +----------+-----+------------------+--------------------------------------+~%") - (values)) - - -(defun dump-stack-frame (wam start-address) - (loop :with remaining = nil - :with arg-number = nil - :for address :from start-address - :for offset :from 0 - :for type = (wam-store-type wam address) - :for value = (wam-store-value wam address) - :while (or (null remaining) (plusp remaining)) - :do (format - t " | ~8,'0X | ~A | ~30A|~A~A~A~%" - address - (dump-cell-value value) - (cond - ((= address +stack-start+) "") - ((= offset 0) "CE ===========================") - ((= offset 1) "CP") - ((= offset 2) "CUT") - ((= offset 3) (progn - (setf remaining value - arg-number 0) - (format nil "N: ~D" value))) - (t (prog1 - (format nil " Y~D: ~A ~A" - arg-number - (cell-type-short-name type) - (dump-cell-value value)) - (decf remaining) - (incf arg-number)))) - (if (= address (wam-environment-pointer wam)) " <- E" "") - (if (= address (wam-backtrack-pointer wam)) " <- B" "") - (if (= address (wam-cut-pointer wam)) " <- CUT" "")) - :finally (return address))) - -(defun dump-stack-choice (wam start-address) - (loop :with remaining = nil - :with arg-number = nil - :for address :from start-address - :for offset :from 0 - :for type = (wam-store-type wam address) - :for value = (wam-store-value wam address) - :while (or (null remaining) (plusp remaining)) - :do (format - t " | ~8,'0X | ~A | ~30A|~A~A~A~%" - address - (dump-cell-value value) - (cond - ((= address +stack-start+) "") - ((= offset 0) (progn - (setf remaining value - arg-number 0) - (format nil "N: ~D =============" value))) - ((= offset 1) "CE saved env pointer") - ((= offset 2) "CP saved cont pointer") - ((= offset 3) "CB previous choice") - ((= offset 4) "BP next clause") - ((= offset 5) "TR saved trail pointer") - ((= offset 6) "H saved heap pointer") - (t (prog1 - (format nil " A~D: ~A ~A" - arg-number - (cell-type-short-name type) - (dump-cell-value value)) - (decf remaining) - (incf arg-number)))) - (if (= address (wam-environment-pointer wam)) " <- E" "") - (if (= address (wam-backtrack-pointer wam)) " <- B" "") - (if (= address (wam-cut-pointer wam)) " <- CUT" "")) - :finally (return address))) - -(defun dump-stack (wam) - (format t "STACK~%") - (format t " +----------+------------------+-------------------------------+~%") - (format t " | ADDR | VALUE | |~%") - (format t " +----------+------------------+-------------------------------+~%") - (with-accessors ((e wam-environment-pointer) (b wam-backtrack-pointer)) wam - (when (not (= +stack-start+ e b)) - (loop :with address = (1+ +stack-start+) - :while (< address (wam-stack-top wam)) - :do (cond - ((= address e) (setf address (dump-stack-frame wam address))) - ((= address b) (setf address (dump-stack-choice wam address))) - (t - (format t " | ~8,'0X | | |~%" address) - (incf address)))))) - (format t " +----------+------------------+-------------------------------+~%")) - - -(defun pretty-functor (functor) - (etypecase functor - (symbol (format nil "~A/0" functor)) - (cons (destructuring-bind (symbol . arity) functor - (format nil "~A/~D" symbol arity))))) - -(defun pretty-argument (argument) - (typecase argument - (fixnum (format nil "~4,'0X" argument)) - (t (format nil "#<*>")))) - -(defun pretty-arguments (arguments) - (format nil "~10<~{ ~A~}~;~>" (mapcar #'pretty-argument arguments))) - - -(defgeneric instruction-details (opcode arguments)) - -(defmethod instruction-details ((opcode t) arguments) - (format nil "~A~A" - (opcode-short-name opcode) - (pretty-arguments arguments))) - - -(defmethod instruction-details ((opcode (eql +opcode-get-structure+)) arguments) - (format nil "GETS~A ; X~A = ~A/~D" - (pretty-arguments arguments) - (third arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-structure+)) arguments) - (format nil "PUTS~A ; X~A <- new ~A/~D" - (pretty-arguments arguments) - (third arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-get-variable-local+)) arguments) - (format nil "GVAR~A ; X~A <- A~A" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-get-variable-stack+)) arguments) - (format nil "GVAR~A ; Y~A <- A~A" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-get-value-local+)) arguments) - (format nil "GVLU~A ; X~A = A~A" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-get-value-stack+)) arguments) - (format nil "GVLU~A ; Y~A = A~A" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-variable-local+)) arguments) - (format nil "PVAR~A ; X~A <- A~A <- new unbound REF" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-variable-stack+)) arguments) - (format nil "PVAR~A ; Y~A <- A~A <- new unbound REF" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-value-local+)) arguments) - (format nil "PVLU~A ; A~A <- X~A" - (pretty-arguments arguments) - (second arguments) - (first arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-value-stack+)) arguments) - (format nil "PVLU~A ; A~A <- Y~A" - (pretty-arguments arguments) - (second arguments) - (first arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-call+)) arguments) - (format nil "CALL~A ; call ~A/~D" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-jump+)) arguments) - (format nil "JUMP~A ; jump ~A/~D" - (pretty-arguments arguments) - (first arguments) - (second arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-dynamic-call+)) arguments) - (format nil "DYCL~A ; dynamic call" - (pretty-arguments arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-dynamic-jump+)) arguments) - (format nil "DYJP~A ; dynamic jump" - (pretty-arguments arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-get-constant+)) arguments) - (format nil "GCON~A ; X~A = CONSTANT ~A" - (pretty-arguments arguments) - (second arguments) - (pretty-functor (first arguments)))) - -(defmethod instruction-details ((opcode (eql +opcode-put-constant+)) arguments) - (format nil "PCON~A ; X~A <- CONSTANT ~A" - (pretty-arguments arguments) - (second arguments) - (pretty-functor (first arguments)))) - -(defmethod instruction-details ((opcode (eql +opcode-subterm-constant+)) arguments) - (format nil "SCON~A ; SUBTERM CONSTANT ~A" - (pretty-arguments arguments) - (pretty-functor (first arguments)))) - -(defmethod instruction-details ((opcode (eql +opcode-get-list+)) arguments) - (format nil "GLST~A ; X~A = [vvv | vvv]" - (pretty-arguments arguments) - (first arguments))) - -(defmethod instruction-details ((opcode (eql +opcode-put-list+)) arguments) - (format nil "PLST~A ; X~A = [vvv | vvv]" - (pretty-arguments arguments) - (first arguments))) - - -(defun functor-table (wam) - (loop - :with result = (make-hash-table) - :for arity :from 0 - :for table :across (wam-code-labels wam) - :when table - :do (maphash (lambda (functor loc) - (setf (gethash loc result) - (cons functor arity))) - table) - :finally (return result))) - -(defun dump-code-store (wam code-store - &optional - (from 0) - (to (length code-store))) - ;; This is a little trickier than might be expected. We have to walk from - ;; address 0 no matter what `from` we get, because instruction sizes vary and - ;; aren't aligned. So if we just start at `from` we might start in the middle - ;; of an instruction and everything would be fucked. - (let ((addr 0) - (lbls (functor-table wam))) ; oh god - (while (< addr to) - (let ((instruction (retrieve-instruction code-store addr))) - (when (>= addr from) - (when (not (= +opcode-noop+ (aref instruction 0))) - - (let ((lbl (gethash addr lbls))) ; forgive me - (when lbl - (format t ";;;; BEGIN ~A~%" - (pretty-functor lbl)))) - (format t ";~A~4,'0X: " - (if (= (wam-program-counter wam) addr) - ">>" - " ") - addr) - (format t "~A~%" (instruction-details (aref instruction 0) - (rest (coerce instruction 'list)))))) - (incf addr (length instruction)))))) - -(defun dump-code - (wam - &optional - (from (max (- (wam-program-counter wam) 8) ; wow - 0)) ; this - (to (min (+ (wam-program-counter wam) 8) ; is - (length (wam-code wam))))) ; bad - (format t "CODE (size: ~D frame(s) / ~:[OPEN~;CLOSED~])~%" - (length (wam-logic-stack wam)) - (wam-logic-closed-p wam)) - (dump-code-store wam (wam-code wam) from to)) - - -(defun dump-wam-registers (wam) - (format t "REGISTERS:~%") - (format t "~5@A -> ~8X~%" "S" (wam-subterm wam)) - (loop :for register :from 0 :to +register-count+ - :for type = (wam-store-type wam register) - :for value = (wam-store-value wam register) - :when (not (cell-type-p (wam register) :null)) - :do (format t "~5@A -> ~A ~A ~A~%" - (format nil "X~D" register) - (cell-type-short-name type) - (dump-cell-value value) - (format nil "; ~A" (first (extract-things wam (list register))))))) - - -(defun dump-wam-trail (wam) - (format t " TRAIL: ") - (loop :for address :across (wam-trail wam) :do - (format t "~8,'0X //" address)) - (format t "~%")) - - -(defun dump-wam (wam from to) - (format t " FAIL: ~A~%" (wam-fail wam)) - (format t " BACKTRACKED?: ~A~%" (wam-backtracked wam)) - (format t " MODE: ~S~%" (wam-mode wam)) - (format t " HEAP SIZE: ~A~%" (- (wam-heap-pointer wam) +heap-start+)) - (format t " PROGRAM COUNTER: ~4,'0X~%" (wam-program-counter wam)) - (format t "CONTINUATION PTR: ~4,'0X~%" (wam-continuation-pointer wam)) - (format t " ENVIRONMENT PTR: ~4,'0X~%" (wam-environment-pointer wam)) - (format t " BACKTRACK PTR: ~4,'0X~%" (wam-backtrack-pointer wam)) - (format t " CUT PTR: ~4,'0X~%" (wam-cut-pointer wam)) - (format t "HEAP BCKTRCK PTR: ~4,'0X~%" (wam-heap-backtrack-pointer wam)) - (dump-wam-trail wam) - (dump-wam-registers wam) - (format t "~%") - (dump-heap wam from to) - (format t "~%") - (dump-stack wam) - (format t "~%") - (dump-code wam)) - -(defun dump-wam-query-code (wam &optional (max +maximum-query-size+)) - (with-slots (code) wam - (dump-code-store wam code 0 max))) - -(defun dump-wam-code (wam) - (with-slots (code) wam - (dump-code-store wam code +maximum-query-size+ (length code)))) - -(defun dump-wam-full (wam) - (dump-wam wam (1+ +heap-start+) (wam-heap-pointer wam))) - diff -r 19200659513a -r 81939d20415a src/wam/types.lisp --- a/src/wam/types.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -(in-package #:bones.wam) - -; (deftype cell-type () ; todo: pick one of these... -; `(integer 0 ,(1- +number-of-cell-types+))) - -(deftype cell-type () - 'fixnum) - -(deftype cell-value () - '(or fixnum t)) - - -(deftype type-store () - '(simple-array cell-type (*))) - -(deftype value-store () - '(simple-array cell-value (*))) - - -(deftype store-index () - `(integer 0 ,(1- +store-limit+))) - -(deftype heap-index () - `(integer ,+heap-start+ ,(1- +store-limit+))) - -(deftype stack-index () - `(integer ,+stack-start+ ,(1- +stack-end+))) - -(deftype trail-index () - `(integer 0 ,(1- +trail-limit+))) - -(deftype register-index () - `(integer 0 ,(1- +register-count+))) - - -(deftype fname () - 'symbol) - -(deftype arity () - `(integer 0 ,+maximum-arity+)) - - -(deftype code-index () - ;; either an address or the sentinel - `(integer 0 ,(1- +code-limit+))) - -(deftype code-word () - t) - - -(deftype generic-code-store () - `(simple-array code-word (*))) - -(deftype query-code-holder () - `(simple-array code-word (,+maximum-query-size+))) - -(deftype query-size () - `(integer 0 ,+maximum-query-size+)) - -(deftype instruction-size () - `(integer 1 ,+maximum-instruction-size+)) - - -(deftype opcode () - `(integer 0 ,(1- +number-of-opcodes+))) - - -(deftype stack-frame-size () - `(integer 4 ,+stack-frame-size-limit+)) - -(deftype stack-choice-size () - ;; TODO: is this actually right? check on frame size limit vs choice point - ;; size limit... - `(integer 8 ,+stack-frame-size-limit+)) - -(deftype stack-frame-argcount () - 'arity) - -(deftype continuation-pointer () - 'code-index) - -(deftype environment-pointer () - 'stack-index) - -(deftype backtrack-pointer () - 'stack-index) - - -(deftype stack-frame-word () - '(or - environment-pointer ; CE - continuation-pointer ; CP - stack-frame-argcount)) ; N - -(deftype stack-choice-word () - '(or - environment-pointer ; CE - backtrack-pointer ; B, CC - continuation-pointer ; CP, BP - stack-frame-argcount ; N - trail-index ; TR - heap-index)) ; H - -(deftype stack-word () - '(or stack-frame-word stack-choice-word)) - - -;;;; Sanity Checks -;;; The values on the WAM stack are a bit of a messy situation. The WAM store -;;; is defined as an array of cells, but certain things on the stack aren't -;;; actually cells (e.g. the stored continuation pointer). -;;; -;;; This shouldn't be a problem (aside from being ugly) as long as they all fit -;;; inside fixnums... so let's just make sure that's the case. - -(defun sanity-check-stack-type (type) - (assert (subtypep type 'fixnum) () - "Type ~A is too large!" - type) - (values)) - -(sanity-check-stack-type 'stack-frame-argcount) -(sanity-check-stack-type 'environment-pointer) -(sanity-check-stack-type 'continuation-pointer) -(sanity-check-stack-type 'backtrack-pointer) -(sanity-check-stack-type 'trail-index) -(sanity-check-stack-type 'stack-word) diff -r 19200659513a -r 81939d20415a src/wam/ui.lisp --- a/src/wam/ui.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,233 +0,0 @@ -(in-package #:bones.wam) - - -;;;; Database -(defvar *database* nil) - - -(defun make-database () - (make-wam)) - -(defun reset-database () - (setf *database* (make-database))) - - -(defmacro with-database (database &body body) - `(let ((*database* ,database)) - ,@body)) - -(defmacro with-fresh-database (&body body) - `(with-database (make-database) ,@body)) - - -;;;; Normalization -(eval-when (:compile-toplevel :load-toplevel :execute) - (defun normalize-term (term) - ;; Normally a rule consists of a head terms and many body terms, like so: - ;; - ;; (likes sally ?who) (likes ?who cats) - ;; - ;; But sometimes people are lazy and don't include the parens around - ;; zero-arity predicates: - ;; - ;; (happy steve) sunny - (if (and (not (variablep term)) - (symbolp term) - (not (eq term '!))) ; jesus - (list term) - term))) - - -;;;; Assertion -(defun invoke-rule (head &rest body) - (assert *database* (*database*) "No database.") - (wam-logic-frame-add-clause! *database* - (list* (normalize-term head) - (mapcar #'normalize-term body))) - nil) - -(defun invoke-fact (fact) - (invoke-rule fact) - nil) - -(defun invoke-facts (&rest facts) - (mapc #'invoke-fact facts) - nil) - - -(defmacro rule (head &body body) - `(invoke-rule ',head ,@(loop :for term :in body :collect `',term))) - -(defmacro fact (fact) - `(invoke-fact ',fact)) - -(defmacro facts (&body facts) - `(progn - ,@(loop :for f :in facts :collect `(fact ,f)))) - - -;;;; Logic Frames -(defun push-logic-frame () - (assert *database* (*database*) "No database.") - (wam-push-logic-frame! *database*)) - -(defun pop-logic-frame () - (assert *database* (*database*) "No database.") - (wam-pop-logic-frame! *database*)) - -(defun finalize-logic-frame () - (assert *database* (*database*) "No database.") - (wam-finalize-logic-frame! *database*)) - -(defmacro push-logic-frame-with (&body body) - `(prog2 - (push-logic-frame) - (progn ,@body) - (finalize-logic-frame))) - - -;;;; Querying -(defun perform-aot-query (code size vars result-function) - (assert *database* (*database*) "No database.") - (run-aot-compiled-query *database* code size vars - :result-function result-function)) - -(defun perform-query (terms result-function) - (assert *database* (*database*) "No database.") - (run-query *database* (mapcar #'normalize-term terms) - :result-function result-function)) - - -(defmacro define-invocation ((name aot-name) arglist &body body) - (with-gensyms (terms data code size vars) - `(progn - (defun ,name ,(append arglist `(&rest ,terms)) - (macrolet ((invoke (result-function) - `(perform-query ,',terms ,result-function))) - ,@body)) - (defun ,aot-name ,(append arglist `(,data)) - (destructuring-bind (,code ,size ,vars) ,data - (macrolet ((invoke (result-function) - `(perform-aot-query ,',code ,',size ,',vars - ,result-function))) - ,@body)))))) - - -(define-invocation (invoke-query invoke-query-aot) () - (let ((result nil) - (succeeded nil)) - (invoke (lambda (r) - (setf result r - succeeded t) - t)) - (values result succeeded))) - -(define-invocation (invoke-query-all invoke-query-all-aot) () - (let ((results nil)) - (invoke (lambda (result) - (push result results) - nil)) - (nreverse results))) - -(define-invocation (invoke-query-map invoke-query-map-aot) (function) - (let ((results nil)) - (invoke (lambda (result) - (push (funcall function result) results) - nil)) - (nreverse results))) - -(define-invocation (invoke-query-do invoke-query-do-aot) (function) - (invoke (lambda (result) - (funcall function result) - nil)) - nil) - -(define-invocation (invoke-query-find invoke-query-find-aot) (predicate) - (let ((results nil) - (succeeded nil)) - (invoke (lambda (result) - (if (funcall predicate result) - (progn (setf results result - succeeded t) - t) - nil))) - (values results succeeded))) - -(define-invocation (invoke-prove invoke-prove-aot) () - (let ((succeeded nil)) - (invoke (lambda (result) - (declare (ignore result)) - (setf succeeded t) - t)) - succeeded)) - - -(defun quote-terms (terms) - (loop :for term :in terms :collect `',term)) - -(defmacro query (&rest terms) - `(invoke-query ,@(quote-terms terms))) - -(defmacro query-all (&rest terms) - `(invoke-query-all ,@(quote-terms terms))) - -(defmacro query-map (function &rest terms) - `(invoke-query-map ,function ,@(quote-terms terms))) - -(defmacro query-do (function &rest terms) - `(invoke-query-do ,function ,@(quote-terms terms))) - -(defmacro query-find (predicate &rest terms) - `(invoke-query-find ,predicate ,@(quote-terms terms))) - -(defmacro prove (&rest terms) - `(invoke-prove ,@(quote-terms terms))) - - -;;;; Chili Dogs -(eval-when (:compile-toplevel :load-toplevel :execute) - (defun make-aot-data-form (terms) - (with-gensyms (code size vars) - `(load-time-value - (let* ((,code (allocate-query-holder))) - (multiple-value-bind (,vars ,size) - (compile-query-into - ,code ',(->> terms - (mapcar #'eval) - (mapcar #'normalize-term))) - (list ,code ,size ,vars))) - t)))) - - -(defmacro define-invocation-compiler-macro (name aot-name arglist) - `(define-compiler-macro ,name (&whole form - ,@arglist - &rest terms - &environment env) - (if (every (rcurry #'constantp env) terms) - `(,',aot-name ,,@arglist ,(make-aot-data-form terms)) - form))) - - -(define-invocation-compiler-macro invoke-query invoke-query-aot ()) -(define-invocation-compiler-macro invoke-query-all invoke-query-all-aot ()) -(define-invocation-compiler-macro invoke-query-map invoke-query-map-aot (function)) -(define-invocation-compiler-macro invoke-query-do invoke-query-do-aot (function)) -(define-invocation-compiler-macro invoke-query-find invoke-query-find-aot (predicate)) -(define-invocation-compiler-macro invoke-prove invoke-prove-aot ()) - - -;;;; Debugging -(defun dump (&optional full-code) - (dump-wam-full *database*) - (when full-code - (dump-wam-code *database*))) - -(defmacro bytecode (&body body) - `(with-fresh-database - (push-logic-frame-with ,@body) - (format t ";;;; PROGRAM CODE =======================~%") - (dump-wam-code *database*) - (format t "~%;;;; QUERY CODE =========================~%") - (dump-wam-query-code *database*))) - diff -r 19200659513a -r 81939d20415a src/wam/vm.lisp --- a/src/wam/vm.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,919 +0,0 @@ -(in-package #:bones.wam) - -;;;; Config -(defvar *step* nil) - - -;;;; Utilities -(declaim (inline functors-match-p - constants-match-p)) - - -(defun push-unbound-reference! (wam) - "Push a new unbound reference cell onto the heap, returning its address." - (wam-heap-push! wam +cell-type-reference+ (wam-heap-pointer wam))) - -(defun push-new-structure! (wam) - "Push a new structure cell onto the heap, returning its address. - - The structure cell's value will point at the next address, so make sure you - push something there too! - - " - (wam-heap-push! wam +cell-type-structure+ (1+ (wam-heap-pointer wam)))) - -(defun push-new-list! (wam) - "Push a new list cell onto the heap, returning its address. - - The list cell's value will point at the next address, so make sure you push - something there too! - - " - (wam-heap-push! wam +cell-type-list+ (1+ (wam-heap-pointer wam)))) - -(defun push-new-functor! (wam functor arity) - "Push a new functor cell pair onto the heap, returning its address." - (prog1 - (wam-heap-push! wam +cell-type-functor+ functor) - (wam-heap-push! wam +cell-type-lisp-object+ arity))) - -(defun push-new-constant! (wam constant) - "Push a new constant cell onto the heap, returning its address." - (wam-heap-push! wam +cell-type-constant+ constant)) - - -(defun functors-match-p (f1 a1 f2 a2) - "Return whether the two functor cell values represent the same functor." - (and (eq f1 f2) - (= a1 a2))) - -(defun constants-match-p (c1 c2) - "Return whether the two constant cell values unify." - (eq c1 c2)) - -(defun lisp-objects-match-p (o1 o2) - "Return whether the two lisp object cells unify." - (eql o1 o2)) - - -;;;; "Ancillary" Functions -(declaim (inline deref unbind! trail!)) - - -(defun backtrack! (wam) - "Backtrack after a failure." - (if (wam-backtrack-pointer-unset-p wam) - (setf (wam-fail wam) t) - (setf (wam-program-counter wam) (wam-stack-choice-bp wam) - (wam-cut-pointer wam) (wam-stack-choice-cc wam) - (wam-backtracked wam) t))) - -(defun trail! (wam address) - "Push the given address onto the trail (but only if necessary)." - (when (< address (wam-heap-backtrack-pointer wam)) - (wam-trail-push! wam address))) - -(defun unbind! (wam address) - "Unbind the reference cell at `address`. - - No error checking is done, so please don't try to unbind something that's not - (originally) a reference cell. - - " - (wam-set-store-cell! wam address +cell-type-reference+ address)) - -(defun unwind-trail! (wam trail-start trail-end) - "Unbind all the things in the given range of the trail." - (loop :for i :from trail-start :below trail-end :do - (unbind! wam (wam-trail-value wam i)))) - -(defun tidy-trail! (wam) - (with-accessors ((tr wam-trail-pointer) - (h wam-heap-pointer) - (hb wam-heap-backtrack-pointer) - (b wam-backtrack-pointer)) wam - (loop - ;; The book is, yet again, fucked. It just sets `i` to be the trail - ;; pointer from the choice point frame. But what if we just popped off - ;; the last choice point? If that's the case we need to look over the - ;; entire trail. - :with i = (if (wam-backtrack-pointer-unset-p wam b) - 0 - (wam-stack-choice-tr wam)) - :for target = (wam-trail-value wam i) - :while (< i tr) :do - (if (or (< target hb) - (and (< h target) - (< target b))) - (incf i) - (progn - (setf (wam-trail-value wam i) - (wam-trail-value wam (1- tr))) - (decf tr)))))) - -(defun deref (wam address) - "Dereference the address in the WAM store to its eventual destination. - - If the address is a variable that's bound to something, that something will be - looked up (recursively) and the address of whatever it's ultimately bound to - will be returned. - - " - ;; SBCL won't inline recursive functions :( - (loop - (cell-typecase (wam address) - ((:reference ref) (if (= address ref) - (return address) ; unbound ref - (setf address ref))) ; bound ref - (t (return address))))) ; non-ref - -(defun bind! (wam address-1 address-2) - "Bind the unbound reference cell to the other. - - `bind!` takes two addresses as arguments. You are expected to have `deref`ed - previously to obtain these addresses, so neither of them should ever refer to - a bound reference. - - At least one of the arguments *must* refer to an unbound reference cell. This - unbound reference will be bound to point at the other address. - - If *both* addresses refer to unbound references, the direction of the binding - is chosen arbitrarily. - - " - ;; In case it's not absolutely clear from the book: binding has to actually - ;; COPY the source cell into the destination. - ;; - ;; It can't just update the cell value of the destination REF, because if - ;; you're binding a REF on the heap to something in a register then doing so - ;; would end up with a REF to a register address. This would be bad because - ;; that register would probably get clobbered later, and the REF would now be - ;; pointing to garbage. - (cond - ;; Bind (a1 <- a2) if: - ;; - ;; * A1 is a REF and A2 is something else, or... - ;; * They're both REFs but A2 has a lower address than A1. - ((and (cell-type-p (wam address-1) :reference) - (or (not (cell-type-p (wam address-2) :reference)) - (< address-2 address-1))) - (wam-copy-store-cell! wam address-1 address-2) - (trail! wam address-1)) - - ;; Bind (a2 <- a1) if A2 is a REF and A1 is something else. - ((cell-type-p (wam address-2) :reference) - (wam-copy-store-cell! wam address-2 address-1) - (trail! wam address-2)) - - ;; wut - (t (error "At least one cell must be an unbound reference when binding.")))) - -(defun unify! (wam a1 a2) - (setf (wam-fail wam) nil) - (wam-unification-stack-push! wam a1 a2) - - (until (or (wam-fail wam) - (wam-unification-stack-empty-p wam)) - (let* ((d1 (deref wam (wam-unification-stack-pop! wam))) - (d2 (deref wam (wam-unification-stack-pop! wam))) - (t1 (wam-store-type wam d1)) - (t2 (wam-store-type wam d2))) - (macrolet ((both (cell-type-designator) - `(and - (cell-type= t1 ,cell-type-designator) - (cell-type= t2 ,cell-type-designator))) - (either (cell-type-designator) - `(or - (cell-type= t1 ,cell-type-designator) - (cell-type= t2 ,cell-type-designator)))) - (flet ((match-values (predicate) - (when (not (funcall predicate - (wam-store-value wam d1) - (wam-store-value wam d2))) - (backtrack! wam)))) - (when (not (= d1 d2)) - (cond - ;; If at least one is a reference, bind them. - ;; - ;; We know that any references we see here will be unbound because - ;; we deref'ed them above. - ((either :reference) - (bind! wam d1 d2)) - - ;; Otherwise if they're both constants or lisp objects, make sure - ;; they match exactly. - ((both :constant) (match-values #'constants-match-p)) - ((both :lisp-object) (match-values #'lisp-objects-match-p)) - - ;; Otherwise if they're both lists, unify their contents. - ((both :list) - (wam-unification-stack-push! wam - (wam-store-value wam d1) - (wam-store-value wam d2)) - (wam-unification-stack-push! wam - (1+ (wam-store-value wam d1)) - (1+ (wam-store-value wam d2)))) - - ;; Otherwise if they're both structures, make sure they match and - ;; then schedule their subterms to be unified. - ((both :structure) - (let* ((s1 (wam-store-value wam d1)) ; find where they - (s2 (wam-store-value wam d2)) ; start on the heap - (f1 (wam-store-value wam s1)) ; grab the - (f2 (wam-store-value wam s2)) ; functors - (a1 (wam-store-value wam (1+ s1))) ; and the - (a2 (wam-store-value wam (1+ s2)))) ; arities - (if (functors-match-p f1 a1 f2 a2) - ;; If the functors match, push their pairs of arguments onto - ;; the stack to be unified. - (loop :repeat a1 - :for subterm1 :from (+ 2 s1) - :for subterm2 :from (+ 2 s2) - :do (wam-unification-stack-push! wam subterm1 subterm2)) - ;; Otherwise we're hosed. - (backtrack! wam)))) - - ;; Otherwise we're looking at two different kinds of cells, and are - ;; just totally hosed. Backtrack. - (t (backtrack! wam))))))))) - - -;;;; Instruction Definition -;;; These macros are a pair of real greasy bastards. -;;; -;;; Basically the issue is that there exist two separate types of registers: -;;; local registers and stack registers. The process of retrieving the contents -;;; of a register is different for each type. -;;; -;;; Certain machine instructions take a register as an argument and do something -;;; with it. Because the two register types require different access methods, -;;; the instruction needs to know what kind of register it's dealing with. -;;; -;;; One possible way to solve this would be to encode whether this is -;;; a local/stack register in the register argument itself (e.g. with a tag -;;; bit). This would work, and a previous version of the code did that, but -;;; it's not ideal. It turns out we know the type of the register at compile -;;; time, so requiring a mask/test at run time for every register access is -;;; wasteful. -;;; -;;; Instead we use an ugly, but fast, solution. For every instruction that -;;; takes a register argument we make TWO opcodes instead of just one. The -;;; first is the "-local" variant of the instruction, which treats its register -;;; argument as a local register. The second is the "-stack" variant. When we -;;; compile we can just pick the appropriate opcode, and now we no longer need -;;; a runtime test for every single register assignment. -;;; -;;; To make the process of defining these two "variants" less excruciating we -;;; have these two macros. `define-instruction` (singular) is just a little -;;; sugar around `defun`, for those instructions that don't deal with -;;; arguments. -;;; -;;; `define-instructions` (plural) is the awful one. You pass it a pair of -;;; symbols for the two variant names. Two functions will be defined, both with -;;; the same body, with a few symbols macroletted to the appropriate access -;;; code. -;;; -;;; So in the body, instead of using: -;;; -;;; (wam-set-{local/stack}-register wam reg type value) -;;; -;;; you use: -;;; -;;; (%wam-set-register% wam reg type value) -;;; -;;; and it'll do the right thing. - -(defmacro define-instruction - ((name &optional should-inline) lambda-list &body body) - "Define an instruction function. - - This is just sugar over `defun`. - - " - `(progn - (declaim (,(if should-inline 'inline 'notinline) ,name)) - (defun ,name ,lambda-list - ,@body - nil))) - -(defmacro define-instructions - ((local-name stack-name &optional should-inline) lambda-list &body body) - "Define a local/stack pair of instructions." - `(progn - (macrolet ((%wam-register% (wam register) - `(wam-local-register-address ,wam ,register)) - (%wam-register-type% (wam register) - `(wam-local-register-type ,wam ,register)) - (%wam-register-value% (wam register) - `(wam-local-register-value ,wam ,register)) - (%wam-set-register% (wam register type value) - `(wam-set-local-register! ,wam ,register ,type ,value)) - (%wam-copy-to-register% (wam register source) - `(wam-copy-to-local-register! ,wam ,register ,source))) - (define-instruction (,local-name ,should-inline) ,lambda-list - ,@body)) - (macrolet ((%wam-register% (wam register) - `(wam-stack-register-address ,wam ,register)) - (%wam-register-type% (wam register) - `(wam-stack-register-type ,wam ,register)) - (%wam-register-value% (wam register) - `(wam-stack-register-value ,wam ,register)) - (%wam-set-register% (wam register type value) - `(wam-set-stack-register! ,wam ,register ,type ,value)) - (%wam-copy-to-register% (wam register source) - `(wam-copy-to-stack-register! ,wam ,register ,source))) - (define-instruction (,stack-name ,should-inline) ,lambda-list - ,@body)))) - - -;;;; Query Instructions -(define-instruction (%put-structure) (wam functor arity register) - (wam-set-local-register! wam register - +cell-type-structure+ - (push-new-functor! wam functor arity)) - (setf (wam-mode wam) :write)) - -(define-instruction (%put-list) (wam register) - (wam-set-local-register! wam register - +cell-type-list+ - (wam-heap-pointer wam)) - (setf (wam-mode wam) :write)) - - -(define-instructions (%put-variable-local %put-variable-stack) - (wam register argument) - (let ((ref (push-unbound-reference! wam))) - (%wam-copy-to-register% wam register ref) - (wam-copy-to-local-register! wam argument ref) - (setf (wam-mode wam) :write))) - -(define-instructions (%put-value-local %put-value-stack) - (wam register argument) - (wam-copy-to-local-register! wam argument (%wam-register% wam register)) - (setf (wam-mode wam) :write)) - - -(define-instruction (%put-void) (wam argument) - (wam-copy-to-local-register! wam argument (push-unbound-reference! wam))) - - -;;;; Program Instructions -(define-instruction (%get-structure) (wam functor arity register) - (cell-typecase (wam (deref wam register) address) - ;; If the register points at an unbound reference cell, we push three new - ;; cells onto the heap: - ;; - ;; | N | STR | N+1 | - ;; | N+1 | FUN | f | - ;; | N+2 | OBJ | n | - ;; | | | | <- S - ;; - ;; Then we bind this reference cell to point at the new structure, set - ;; the S register to point beneath it and flip over to write mode. - ;; - ;; It seems a bit confusing that we don't push the rest of the structure - ;; stuff on the heap after it too. But that's going to happen in the - ;; next few instructions (which will be subterm-*'s, executed in write - ;; mode). - (:reference - (let ((structure-address (push-new-structure! wam)) - (functor-address (push-new-functor! wam functor arity))) - (bind! wam address structure-address) - (setf (wam-mode wam) :write - (wam-subterm wam) (+ 2 functor-address)))) - - ;; If the register points at a structure cell, then we look at where - ;; that cell points (which will be the functor for the structure): - ;; - ;; | N | STR | M | points at the structure, not necessarily contiguous - ;; | ... | - ;; | M | FUN | f | the functor (hopefully it matches) - ;; | M+1 | OBJ | 2 | the arity (hopefully it matches) - ;; | M+2 | ... | ... | pieces of the structure, always contiguous - ;; | M+3 | ... | ... | and always right after the functor - ;; - ;; If it matches the functor we're looking for, we can proceed. We set - ;; the S register to the address of the first subform we need to match - ;; (M+2 in the example above). - ((:structure functor-address) - (cell-typecase (wam functor-address) - ((:functor f n) - (if (functors-match-p functor arity f n) - (setf (wam-mode wam) :read - (wam-subterm wam) (+ 2 functor-address)) - (backtrack! wam))))) - - ;; Otherwise we can't unify, so backtrack. - (t (backtrack! wam)))) - -(define-instruction (%get-list) (wam register) - (cell-typecase (wam (deref wam register) address) - ;; If the register points at a reference (unbound, because we deref'ed) we - ;; bind it to a list and flip into write mode to write the upcoming two - ;; things as its contents. - (:reference - (bind! wam address (push-new-list! wam)) - (setf (wam-mode wam) :write)) - - ;; If this is a list, we need to unify its subterms. - ((:list contents) - (setf (wam-mode wam) :read - (wam-subterm wam) contents)) - - ;; Otherwise we can't unify. - (t (backtrack! wam)))) - - -(define-instructions (%get-variable-local %get-variable-stack) - (wam register argument) - (%wam-copy-to-register% wam register argument)) - -(define-instructions (%get-value-local %get-value-stack) - (wam register argument) - (unify! wam register argument)) - - -;;;; Subterm Instructions -(define-instructions (%subterm-variable-local %subterm-variable-stack) - (wam register) - (%wam-copy-to-register% wam register - (ecase (wam-mode wam) - (:read (wam-subterm wam)) - (:write (push-unbound-reference! wam)))) - (incf (wam-subterm wam))) - -(define-instructions (%subterm-value-local %subterm-value-stack) - (wam register) - (ecase (wam-mode wam) - (:read (unify! wam register (wam-subterm wam))) - (:write (wam-heap-push! wam - (%wam-register-type% wam register) - (%wam-register-value% wam register)))) - (incf (wam-subterm wam))) - -(define-instruction (%subterm-void) (wam n) - (ecase (wam-mode wam) - (:read (incf (wam-subterm wam) n)) - (:write (loop :repeat n - :do (push-unbound-reference! wam))))) - - -;;;; Control Instructions -(declaim (inline %%procedure-call %%dynamic-procedure-call)) - - -(defun %%procedure-call (wam functor arity program-counter-increment is-tail) - (let* ((target (wam-code-label wam functor arity))) - (if (not target) - ;; Trying to call an unknown procedure. - (backtrack! wam) - (progn - (when (not is-tail) - (setf (wam-continuation-pointer wam) ; CP <- next instruction - (+ (wam-program-counter wam) program-counter-increment))) - (setf (wam-number-of-arguments wam) ; set NARGS - arity - - (wam-cut-pointer wam) ; set B0 in case we have a cut - (wam-backtrack-pointer wam) - - (wam-program-counter wam) ; jump - target))))) - -(defun %%dynamic-procedure-call (wam is-tail) - (flet - ((%go (functor arity) - (if is-tail - (%%procedure-call - wam functor arity (instruction-size +opcode-dynamic-jump+) t) - (%%procedure-call - wam functor arity (instruction-size +opcode-dynamic-call+) nil))) - (load-arguments (n start-address) - (loop :for arg :from 0 :below n - :for source :from start-address - :do (wam-copy-to-local-register! wam arg source)))) - (cell-typecase (wam (deref wam 0)) ; A_0 - ((:structure functor-address) - ;; If we have a non-zero-arity structure, we need to set up the - ;; argument registers before we call it. Luckily all the arguments - ;; conveniently live contiguously right after the functor cell. - (cell-typecase (wam functor-address) - ((:functor functor arity) - (load-arguments arity (+ 2 functor-address)) - (%go functor arity)))) - - ;; Zero-arity functors don't need to set up anything at all -- we can - ;; just call them immediately. - ((:constant c) (%go c 0)) - - ;; It's okay to do (call :var), but :var has to be bound by the time you - ;; actually reach it at runtime. - (:reference (error "Cannot dynamically call an unbound variable.")) - - ; You can't call/1 anything else. - (t (error "Cannot dynamically call something other than a structure."))))) - - -(define-instruction (%jump) (wam functor arity) - (%%procedure-call wam functor arity - (instruction-size +opcode-jump+) - t)) - -(define-instruction (%call) (wam functor arity) - (%%procedure-call wam functor arity - (instruction-size +opcode-call+) - nil)) - - -(define-instruction (%dynamic-call) (wam) - (%%dynamic-procedure-call wam nil)) - -(define-instruction (%dynamic-jump) (wam) - (%%dynamic-procedure-call wam t)) - - -(define-instruction (%proceed) (wam) - (setf (wam-program-counter wam) ; P <- CP - (wam-continuation-pointer wam))) - -(define-instruction (%allocate) (wam n) - (let ((old-e (wam-environment-pointer wam)) - (new-e (wam-stack-top wam))) - (wam-stack-ensure-size wam (+ new-e 4 n)) - (setf (wam-stack-word wam new-e) old-e ; CE - (wam-stack-word wam (+ new-e 1)) (wam-continuation-pointer wam) ; CP - (wam-stack-word wam (+ new-e 2)) (wam-cut-pointer wam) ; B0 - (wam-stack-word wam (+ new-e 3)) n ; N - (wam-environment-pointer wam) new-e))) ; E <- new-e - -(define-instruction (%deallocate) (wam) - (setf (wam-continuation-pointer wam) (wam-stack-frame-cp wam) - (wam-environment-pointer wam) (wam-stack-frame-ce wam) - (wam-cut-pointer wam) (wam-stack-frame-cut wam))) - - -;;;; Choice Instructions -(declaim (inline reset-choice-point! restore-registers-from-choice-point!)) - - -(defun reset-choice-point! (wam b) - (setf (wam-backtrack-pointer wam) b - - ;; The book is wrong here: when resetting HB we use the NEW value of B, - ;; so the heap backtrack pointer gets set to the heap pointer saved in - ;; the PREVIOUS choice point. Thanks to the errata at - ;; https://github.com/a-yiorgos/wambook/blob/master/wamerratum.txt for - ;; pointing this out. - ;; - ;; ... well, almost. The errata is also wrong here. If we're popping - ;; the FIRST choice point, then just using the HB from the "previous - ;; choice point" is going to give us garbage, so we should check for - ;; that edge case too. Please kill me. - (wam-heap-backtrack-pointer wam) - (if (wam-backtrack-pointer-unset-p wam b) - +heap-start+ - (wam-stack-choice-h wam b)))) - -(defun restore-registers-from-choice-point! (wam b) - (loop :for register :from 0 :below (wam-stack-choice-n wam b) - :for saved-register :from (wam-stack-choice-argument-address wam 0 b) - :do (wam-copy-to-local-register! wam register saved-register))) - - -(define-instruction (%try) (wam next-clause) - (let ((new-b (wam-stack-top wam)) - (nargs (wam-number-of-arguments wam))) - (wam-stack-ensure-size wam (+ new-b 8 nargs)) - (setf (wam-stack-word wam new-b) nargs ; N - (wam-stack-word wam (+ new-b 1)) (wam-environment-pointer wam) ; CE - (wam-stack-word wam (+ new-b 2)) (wam-continuation-pointer wam) ; CP - (wam-stack-word wam (+ new-b 3)) (wam-backtrack-pointer wam) ; CB - (wam-stack-word wam (+ new-b 4)) next-clause ; BP - (wam-stack-word wam (+ new-b 5)) (wam-trail-pointer wam) ; TR - (wam-stack-word wam (+ new-b 6)) (wam-heap-pointer wam) ; H - (wam-stack-word wam (+ new-b 7)) (wam-cut-pointer wam) ; CC - (wam-heap-backtrack-pointer wam) (wam-heap-pointer wam) ; HB - (wam-backtrack-pointer wam) new-b) ; B - (loop :for i :from 0 :below nargs ; A_i - :for n :from 0 :below nargs ; arg N in the choice point frame - :do (wam-copy-to-stack-choice-argument! wam n i new-b)))) - -(define-instruction (%retry) (wam next-clause) - (let ((b (wam-backtrack-pointer wam))) - (restore-registers-from-choice-point! wam b) - (unwind-trail! wam (wam-stack-choice-tr wam b) (wam-trail-pointer wam)) - (setf (wam-environment-pointer wam) (wam-stack-choice-ce wam b) - (wam-continuation-pointer wam) (wam-stack-choice-cp wam b) - ;; overwrite the next clause address in the choice point - (wam-stack-word wam (+ b 4)) next-clause - (wam-trail-pointer wam) (wam-stack-choice-tr wam b) - (wam-heap-pointer wam) (wam-stack-choice-h wam b) - (wam-heap-backtrack-pointer wam) (wam-heap-pointer wam)))) - -(define-instruction (%trust) (wam) - (let* ((b (wam-backtrack-pointer wam)) - (old-b (wam-stack-choice-cb wam b))) - (restore-registers-from-choice-point! wam b) - (unwind-trail! wam (wam-stack-choice-tr wam b) (wam-trail-pointer wam)) - (setf (wam-environment-pointer wam) (wam-stack-choice-ce wam b) - (wam-continuation-pointer wam) (wam-stack-choice-cp wam b) - (wam-trail-pointer wam) (wam-stack-choice-tr wam b) - (wam-heap-pointer wam) (wam-stack-choice-h wam b)) - (reset-choice-point! wam old-b))) - -(define-instruction (%cut) (wam) - (let ((current-choice-point (wam-backtrack-pointer wam)) - (previous-choice-point (wam-stack-frame-cut wam))) - (when (< previous-choice-point current-choice-point) - (reset-choice-point! wam previous-choice-point) - (tidy-trail! wam)))) - - -;;;; Lisp Object Instructions -(declaim (inline %%match-lisp-object)) - - -(defun %%match-lisp-object (wam object address) - (cell-typecase (wam (deref wam address) address) - ;; If the thing points at a reference (unbound, because we deref'ed) we just - ;; bind it. - (:reference - (wam-set-store-cell! wam address +cell-type-lisp-object+ object) - (trail! wam address)) - - ;; If this is a lisp object, "unify" them with eql. - ((:lisp-object contents) - (when (not (lisp-objects-match-p object contents)) - (backtrack! wam))) - - ;; Otherwise we can't unify. - (t (backtrack! wam)))) - - -(define-instruction (%get-lisp-object) (wam object register) - (%%match-lisp-object wam object register)) - -(define-instruction (%put-lisp-object) (wam object register) - (wam-set-local-register! wam register +cell-type-lisp-object+ object)) - - -;;;; Constant Instructions -(declaim (inline %%match-constant)) - - -(defun %%match-constant (wam constant address) - (cell-typecase (wam (deref wam address) address) - (:reference - (wam-set-store-cell! wam address +cell-type-constant+ constant) - (trail! wam address)) - - ((:constant c) - (when (not (constants-match-p constant c)) - (backtrack! wam))) - - (t (backtrack! wam)))) - - -(define-instruction (%put-constant) (wam constant register) - (wam-set-local-register! wam register +cell-type-constant+ constant)) - -(define-instruction (%get-constant) (wam constant register) - (%%match-constant wam constant register)) - -(define-instruction (%subterm-constant) (wam constant) - (ecase (wam-mode wam) - (:read (%%match-constant wam constant (wam-subterm wam))) - (:write (push-new-constant! wam constant))) - (incf (wam-subterm wam))) - - -;;;; Running -(defun extract-things (wam addresses) - "Extract the things at the given store addresses. - - The things will be returned in the same order as the addresses were given. - - Unbound variables will be turned into uninterned symbols. There will only be - one such symbol for any specific unbound var, so if two addresses are - (eventually) bound to the same unbound var, the symbols returned from this - function will be `eql`. - - " - (let ((unbound-vars (list))) - (labels - ((mark-unbound-var (address) - (let ((symbol (make-symbol (format nil "?VAR-~D" ; lol - (length unbound-vars))))) - (car (push (cons address symbol) unbound-vars)))) - (extract-var (address) - (cdr (or (assoc address unbound-vars) - (mark-unbound-var address)))) - (recur (address) - (cell-typecase (wam (deref wam address) address) - (:null "NULL?!") - ((:reference r) (extract-var r)) - ((:structure s) (recur s)) - ((:list l) (cons (recur l) (recur (1+ l)))) - ((:constant c) c) - ((:functor functor arity) - (list* functor - (loop :repeat arity - :for subterm :from (+ 2 address) - :collect (recur subterm)))) - ((:lisp-object o) o) - (t (error "What to heck is this?"))))) - (mapcar #'recur addresses)))) - -(defun extract-query-results (wam vars) - (let* ((addresses (loop :for var :in vars - ;; TODO: make this suck less - :for i :from (+ (wam-environment-pointer wam) 4) - :collect i)) - (results (extract-things wam addresses))) - (weave vars results))) - - -(defmacro instruction-call (wam instruction code-store pc number-of-arguments) - "Expand into a call of the appropriate machine instruction. - - `pc` should be a safe place representing the program counter. - - `code-store` should be a safe place representing the instructions. - - " - `(,instruction ,wam - ,@(loop :for i :from 1 :to number-of-arguments - :collect `(aref ,code-store (+ ,pc ,i))))) - -(defmacro opcode-case ((wam code opcode-place) &rest clauses) - "Handle each opcode in the main VM run loop. - - Each clause should be of the form: - - (opcode &key instruction (increment-pc t) raw) - - `opcode` must be a constant by macroexpansion time. - - `instruction` should be the corresponding instruction function to call. If - given it will be expanded with the appropriate `aref`s to get its arguments - from the code store. - - If `increment-pc` is true an extra `incf` form will be added after the - instruction to handle incrementing the program counter (but only if - backtracking didn't happen). - - If a `raw` argument is given it will be spliced in verbatim. - - " - ;; This macro is pretty nasty, but it's better than trying to write it all out - ;; by hand. - ;; - ;; The main idea is that we want to be able to nicely specify all our - ;; opcode/instruction pairs in `run`. Furthermore, we need to handle - ;; everything really efficiently because `run` is the hot loop of the entire - ;; VM. It is the #1 function you'll see when profiling. - ;; - ;; This macro handles expanding each case clause into the appropriate `aref`s - ;; and such, as well as updating the program counter. The instruction size of - ;; each opcode is looked up at macroexpansion time to save cycles. - ;; - ;; For example, a clause like this: - ;; - ;; (opcode-case (wam code opcode) - ;; ;; ... - ;; (#.+opcode-put-structure+ :instruction %put-structure)) - ;; - ;; will get expanded into something like this: - ;; - ;; (ecase/tree opcode - ;; ;; ... - ;; (+opcode-put-structure+ (%put-structure wam (aref code (+ program-counter 1)) - ;; (aref code (+ program-counter 2))) - ;; (incf program-counter 3))) - (flet - ((parse-opcode-clause (clause) - (destructuring-bind (opcode &key instruction (increment-pc t) raw) - clause - (let ((size (instruction-size opcode))) - `(,opcode - ,(when instruction - `(instruction-call ,wam - ,instruction - ,code - (wam-program-counter ,wam) - ,(1- size))) - ,(when increment-pc - `(when (not (wam-backtracked ,wam)) - (incf (wam-program-counter ,wam) ,size))) - ,raw))))) - `(ecase/tree ,opcode-place - ,@(mapcar #'parse-opcode-clause clauses)))) - - -(defun run (wam done-thunk &optional (step *step*)) - (loop - :with code = (wam-code wam) - :until (or (wam-fail wam) ; failure - (= (wam-program-counter wam) +code-sentinel+)) ; finished - :for opcode = (the opcode (aref (wam-code wam) (wam-program-counter wam))) - :do (progn - (when step - (dump) - (break "About to execute instruction at ~4,'0X" (wam-program-counter wam))) - - (opcode-case (wam code opcode) - ;; Query - (#.+opcode-put-structure+ :instruction %put-structure) - (#.+opcode-put-variable-local+ :instruction %put-variable-local) - (#.+opcode-put-variable-stack+ :instruction %put-variable-stack) - (#.+opcode-put-value-local+ :instruction %put-value-local) - (#.+opcode-put-value-stack+ :instruction %put-value-stack) - (#.+opcode-put-void+ :instruction %put-void) - ;; Program - (#.+opcode-get-structure+ :instruction %get-structure) - (#.+opcode-get-variable-local+ :instruction %get-variable-local) - (#.+opcode-get-variable-stack+ :instruction %get-variable-stack) - (#.+opcode-get-value-local+ :instruction %get-value-local) - (#.+opcode-get-value-stack+ :instruction %get-value-stack) - ;; Subterm - (#.+opcode-subterm-variable-local+ :instruction %subterm-variable-local) - (#.+opcode-subterm-variable-stack+ :instruction %subterm-variable-stack) - (#.+opcode-subterm-value-local+ :instruction %subterm-value-local) - (#.+opcode-subterm-value-stack+ :instruction %subterm-value-stack) - (#.+opcode-subterm-void+ :instruction %subterm-void) - ;; Constant - (#.+opcode-put-constant+ :instruction %put-constant) - (#.+opcode-get-constant+ :instruction %get-constant) - (#.+opcode-subterm-constant+ :instruction %subterm-constant) - ;; Lisp Objects - (#.+opcode-put-lisp-object+ :instruction %put-lisp-object) - (#.+opcode-get-lisp-object+ :instruction %get-lisp-object) - ;; List - (#.+opcode-put-list+ :instruction %put-list) - (#.+opcode-get-list+ :instruction %get-list) - ;; Choice - (#.+opcode-try+ :instruction %try) - (#.+opcode-retry+ :instruction %retry) - (#.+opcode-trust+ :instruction %trust) - (#.+opcode-cut+ :instruction %cut) - ;; Control - (#.+opcode-allocate+ :instruction %allocate) - (#.+opcode-deallocate+ :instruction %deallocate) - (#.+opcode-proceed+ :instruction %proceed :increment-pc nil) - (#.+opcode-jump+ :instruction %jump :increment-pc nil) - (#.+opcode-call+ :instruction %call :increment-pc nil) - (#.+opcode-dynamic-jump+ :instruction %dynamic-jump :increment-pc nil) - (#.+opcode-dynamic-call+ :instruction %dynamic-call :increment-pc nil) - ;; Final - (#.+opcode-done+ - :increment-pc nil - :raw (if (funcall done-thunk) - (return-from run nil) - (backtrack! wam)))) - - (setf (wam-backtracked wam) nil) - - (when (>= (wam-program-counter wam) - (wam-code-pointer wam)) - (error "Fell off the end of the program code store.")))) - nil) - - -(defun %run-query (wam vars result-function) - (setf (wam-program-counter wam) 0 - (wam-continuation-pointer wam) +code-sentinel+) - (run wam (lambda () - (funcall result-function - (extract-query-results wam vars)))) - (wam-reset! wam) - nil) - -(defun run-query (wam terms &key (result-function - (lambda (results) - (declare (ignore results))))) - "Compile query `terms` and run the instructions on the `wam`. - - Resets the heap, etc after running. - - When `*step*` is true, break into the debugger before calling the procedure - and after each instruction. - - " - (%run-query wam (compile-query wam terms) result-function)) - -(defun run-aot-compiled-query (wam query-code query-size query-vars - &key (result-function - (lambda (results) - (declare (ignore results))))) - "Run the AOT-compiled query `code`/`vars` on the `wam`. - - Resets the heap, etc after running. - - When `*step*` is true, break into the debugger before calling the procedure - and after each instruction. - - " - (wam-load-query-code! wam query-code query-size) - (%run-query wam query-vars result-function)) - - diff -r 19200659513a -r 81939d20415a src/wam/wam.lisp --- a/src/wam/wam.lisp Sat Aug 20 21:56:20 2016 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,897 +0,0 @@ -(in-package #:bones.wam) - -;;;; WAM -(defun allocate-wam-code (size) - ;; The WAM bytecode is all stored in this array. The first - ;; `+maximum-query-size+` words are reserved for query bytecode, which will - ;; get loaded in (overwriting the previous query) when making a query. - ;; Everything after that is for the actual database. - (make-array (+ +maximum-query-size+ size) - :initial-element 0 - :element-type 'code-word)) - -(defun allocate-query-holder () - (make-array +maximum-query-size+ - :adjustable nil - :initial-element 0 - :element-type 'code-word)) - - -(defun allocate-wam-type-store (size) - ;; The main WAM store(s) contain three separate blocks of values: - ;; - ;; [0, +register-count+) -> the local X_n registers - ;; [+stack-start+, +stack-end+) -> the stack - ;; [+heap-start+, ...) -> the heap - ;; - ;; `+register-count+` and `+stack-start+` are the same number, and - ;; `+stack-end+` and `+heap-start+` are the same number as well. - (make-array (+ +register-count+ - +stack-limit+ - size) ; type array - :initial-element +cell-type-null+ - :element-type 'cell-type)) - -(defun allocate-wam-value-store (size) - (make-array (+ +register-count+ - +stack-limit+ - size) - :initial-element 0 - :element-type 'cell-value)) - -(defun allocate-wam-unification-stack (size) - (make-array size - :fill-pointer 0 - :adjustable t - :element-type 'store-index)) - -(defun allocate-wam-trail (size) - (make-array size - :fill-pointer 0 - :adjustable t - :initial-element 0 - :element-type 'store-index)) - - -(defstruct (wam (:constructor make-wam%)) - ;; Data - (type-store - (error "Type store required.") - :type type-store - :read-only t) - (value-store - (error "Value store required.") - :type value-store - :read-only t) - (unification-stack - (error "Unification stack required.") - :type (vector store-index) - :read-only t) - (trail - (error "Trail required.") - :type (vector store-index) - :read-only t) - - ;; Code - (code - (error "Code store required.") - :type (simple-array code-word (*)) - :read-only t) - (code-labels - (make-array +maximum-arity+ :initial-element nil) - :type (simple-array (or null hash-table)) - :read-only t) - - ;; Logic Stack - (logic-stack nil :type list) - (logic-pool nil :type list) - - ;; Unique registers - (number-of-arguments 0 :type arity) ; NARGS - (subterm +heap-start+ :type heap-index) ; S - (program-counter 0 :type code-index) ; P - (code-pointer +code-main-start+ :type code-index) ; CODE - (heap-pointer (1+ +heap-start+) :type heap-index) ; H - (stack-pointer +stack-start+ :type stack-index) ; SP - (continuation-pointer 0 :type code-index) ; CP - (environment-pointer +stack-start+ :type environment-pointer) ; E - (backtrack-pointer +stack-start+ :type backtrack-pointer) ; B - (cut-pointer +stack-start+ :type backtrack-pointer) ; B0 - (heap-backtrack-pointer +heap-start+ :type heap-index) ; HB - - ;; Flags - (fail nil :type boolean) - (backtracked nil :type boolean) - (mode nil :type (or null (member :read :write)))) - - -(defmethod print-object ((wam wam) stream) - (print-unreadable-object - (wam stream :type t :identity t) - (format stream "an wam"))) - - -(defun make-wam (&key - (store-size (megabytes 10)) - (code-size (megabytes 1))) - (make-wam% :code (allocate-wam-code code-size) - :type-store (allocate-wam-type-store store-size) - :value-store (allocate-wam-value-store store-size) - :unification-stack (allocate-wam-unification-stack 16) - :trail (allocate-wam-trail 64))) - - -;;;; Store -;;; The main store of the WAM is split into two separate arrays: -;;; -;;; * An array of cell types, each a fixnum. -;;; * An array of cell values, each being a fixnum or a normal Lisp pointer. -;;; -;;; The contents of the value depend on the type of cell. -;;; -;;; NULL cells always have a value of zero. -;;; -;;; STRUCTURE cell values are an index into the store, describing where the -;;; structure starts. -;;; -;;; REFERENCE cell values are an index into the store, pointing at whatever the -;;; value is bound to. Unbound variables contain their own store index as -;;; a value. -;;; -;;; FUNCTOR cell values are a pointer to a `(fname . arity)` cons. -;;; -;;; CONSTANT cells are the same as functor cells, except that they always happen -;;; to refer to functors with an arity of zero. -;;; -;;; LIST cell values are an index into the store, pointing at the first of two -;;; consecutive cells. The first cell is the car of the list, the second one is -;;; the cdr. -;;; -;;; LISP-OBJECT cell values are simply arbitrary objects in memory. They are -;;; compared with `eql` during the unification process, so we don't actually -;;; care WHAT they are, exactly. -;;; -;;; STACK cell values are special cases. The WAM's main store is a combination -;;; of the heap, the stack, and registers. Heap cells (and registers) are those -;;; detailed above, but stack cells can also hold numbers like the continuation -;;; pointer. We lump all the extra things together into one kind of cell. - -(declaim (inline wam-store-type - wam-store-value - wam-set-store-cell! - wam-copy-store-cell!)) - - -(defun wam-store-type (wam address) - "Return the type of the cell at the given address." - (aref (wam-type-store wam) address)) - -(defun wam-store-value (wam address) - "Return the value of the cell at the given address." - (aref (wam-value-store wam) address)) - - -(defun wam-set-store-cell! (wam address type value) - (setf (aref (wam-type-store wam) address) type - (aref (wam-value-store wam) address) value)) - -(defun wam-copy-store-cell! (wam destination source) - (wam-set-store-cell! wam - destination - (wam-store-type wam source) - (wam-store-value wam source))) - - -(defun wam-sanity-check-store-read (wam address) - (declare (ignore wam)) - (when (= address +heap-start+) - (error "Cannot read from heap address zero."))) - - -(macrolet ((define-unsafe (name return-type) - `(progn - (declaim (inline ,name)) - (defun ,name (wam address) - (the ,return-type (aref (wam-value-store wam) address)))))) - (define-unsafe %unsafe-null-value (eql 0)) - (define-unsafe %unsafe-structure-value store-index) - (define-unsafe %unsafe-reference-value store-index) - (define-unsafe %unsafe-functor-value fname) - (define-unsafe %unsafe-constant-value fname) - (define-unsafe %unsafe-list-value store-index) - (define-unsafe %unsafe-lisp-object-value t) - (define-unsafe %unsafe-stack-value stack-word)) - - -(defun %type-designator-constant (designator) - (ecase designator - (:null +cell-type-null+) - (:structure +cell-type-structure+) - (:reference +cell-type-reference+) - (:functor +cell-type-functor+) - (:constant +cell-type-constant+) - (:list +cell-type-list+) - (:lisp-object +cell-type-lisp-object+) - ((t) t))) - -(defun %type-designator-accessor (designator) - (ecase designator - (:null '%unsafe-null-value) - (:structure '%unsafe-structure-value) - (:reference '%unsafe-reference-value) - (:functor '%unsafe-functor-value) - (:constant '%unsafe-constant-value) - (:list '%unsafe-list-value) - (:lisp-object '%unsafe-lisp-object-value))) - -(defun parse-cell-typecase-clause (wam address clause) - "Parse a `cell-typecase` clause into the appropriate `ecase` clause." - (destructuring-bind (binding . body) clause - (destructuring-bind - (type-designator &optional value-symbol secondary-value-symbol) - (if (symbolp binding) (list binding) binding) ; normalize binding - (let ((primary-let-binding - (when value-symbol - `((,value-symbol (,(%type-designator-accessor type-designator) - ,wam ,address))))) - (secondary-let-binding - (when secondary-value-symbol - `((,secondary-value-symbol - ,(ecase type-designator - (:functor - `(the arity (%unsafe-lisp-object-value ; yolo - ,wam - (1+ ,address)))))))))) - ; build the ecase clause (const ...body...) - (list - (%type-designator-constant type-designator) - `(let (,@primary-let-binding - ,@secondary-let-binding) - ,@body)))))) - -(defmacro cell-typecase ((wam address &optional address-symbol) &rest clauses) - "Dispatch on the type of the cell at `address` in the WAM store. - - If `address-symbol` is given it will be bound to the result of evaluating - `address` in the remainder of the form. - - The type of the cell will be matched against `clauses` much like `typecase`. - - Each clause should be of the form `(binding forms)`. - - Each binding can be either a simple cell type designator like `:reference`, or - a list of this designator and a symbol to bind the cell's value to. The - symbol is bound with `let` around the `forms` and type-hinted appropriately - (at least on SBCL). - - Example: - - (cell-typecase (wam (deref wam address) final-address) - (:reference (bind final-address foo) - 'it-is-a-reference) - ((:constant c) (list 'it-is-the-constant c)) - (t 'unknown)) - - " - (once-only (wam address) - `(progn - (policy-cond:policy-if (or (= safety 3) (= debug 3)) - (wam-sanity-check-store-read ,wam ,address) - nil) - (let (,@(when address-symbol - (list `(,address-symbol ,address)))) - (case (wam-store-type ,wam ,address) - ,@(mapcar (curry #'parse-cell-typecase-clause wam address) - clauses)))))) - - -(defmacro cell-type= (type type-designator) - `(= ,type ,(%type-designator-constant type-designator))) - -(defmacro cell-type-p ((wam address) type-designator) - `(cell-type= - (wam-store-type ,wam ,address) - ,type-designator)) - - -;;;; Heap -;;; The WAM heap is all the memory left in the store after the local registers -;;; and stack have been accounted for. Because the store is adjustable and the -;;; heap lives at the end of it, the heap can grow if necessary. -;;; -;;; We reserve the first address in the heap as a sentinel, as an "unset" value -;;; for various pointers into the heap. - -(declaim (inline wam-heap-pointer-unset-p wam-heap-push!)) - - -(defun wam-heap-pointer-unset-p (wam address) - (declare (ignore wam)) - (= address +heap-start+)) - -(defun wam-heap-push! (wam type value) - "Push the cell onto the WAM heap and increment the heap pointer. - - Returns the address it was pushed to. - - " - (let ((heap-pointer (wam-heap-pointer wam))) - (if (>= heap-pointer +store-limit+) ; todo: respect actual size... - (error "WAM heap exhausted.") - (progn - (wam-set-store-cell! wam heap-pointer type value) - (incf (wam-heap-pointer wam)) - heap-pointer)))) - - -;;;; Trail -(declaim (inline wam-trail-pointer - (setf wam-trail-pointer) - wam-trail-value - (setf wam-trail-value))) - - -(defun wam-trail-pointer (wam) - "Return the current trail pointer of the WAM." - (fill-pointer (wam-trail wam))) - -(defun (setf wam-trail-pointer) (new-value wam) - (setf (fill-pointer (wam-trail wam)) new-value)) - - -(defun wam-trail-push! (wam address) - "Push `address` onto the trail. - - Returns the address and the trail address it was pushed to. - - " - (let ((trail (wam-trail wam))) - (if (= +trail-limit+ (fill-pointer trail)) - (error "WAM trail exhausted.") - (values address (vector-push-extend address trail))))) - -(defun wam-trail-pop! (wam) - "Pop the top address off the trail and return it." - (vector-pop (wam-trail wam))) - -(defun wam-trail-value (wam address) - ;; TODO: can we really not just pop, or is something else gonna do something - ;; fucky with the trail? - "Return the element (a heap index) in the WAM trail at `address`." - (aref (wam-trail wam) address)) - -(defun (setf wam-trail-value) (new-value wam address) - (setf (aref (wam-trail wam) address) new-value)) - - -;;;; Stack -;;; The stack is stored as a fixed-length hunk of the main WAM store array, -;;; between the local register and the heap, with small glitch: we reserve the -;;; first word of the stack (address `+stack-start`) to mean "uninitialized", so -;;; we have a nice sentinel value for the various pointers into the stack. - -(declaim (inline assert-inside-stack - wam-stack-ensure-size - wam-stack-word - (setf wam-stack-word) - wam-backtrack-pointer-unset-p - wam-environment-pointer-unset-p)) - - -(defun assert-inside-stack (wam address) - (declare (ignorable wam address)) - (policy-cond:policy-cond - ((>= debug 2) - (progn - (assert (<= +stack-start+ address (1- +stack-end+)) () - "Cannot access stack cell at address ~X (outside the stack range ~X to ~X)" - address +stack-start+ +stack-end+) - (assert (not (= +stack-start+ address)) () - "Cannot access stack address zero."))) - ((>= safety 1) - (when (not (< +stack-start+ address +stack-end+)) - (error "Stack bounds crossed. Game over."))) - (t nil)) ; wew lads - nil) - -(defun wam-stack-ensure-size (wam address) - "Ensure the WAM stack is large enough to be able to write to `address`." - (assert-inside-stack wam address)) - - -(defun wam-stack-word (wam address) - "Return the stack word at the given address." - (assert-inside-stack wam address) - (%unsafe-stack-value wam address)) - -(defun (setf wam-stack-word) (new-value wam address) - (assert-inside-stack wam address) - (wam-set-store-cell! wam address +cell-type-stack+ new-value)) - - -(defun wam-backtrack-pointer-unset-p - (wam &optional (backtrack-pointer (wam-backtrack-pointer wam))) - (= backtrack-pointer +stack-start+)) - -(defun wam-environment-pointer-unset-p - (wam &optional (environment-pointer (wam-environment-pointer wam))) - (= environment-pointer +stack-start+)) - - -;;; Stack frames are laid out like so: -;;; -;;; |PREV| -;;; | CE | <-- environment-pointer -;;; | CP | -;;; | B0 | -;;; | N | -;;; | Y0 | -;;; | .. | -;;; | Yn | -;;; |NEXT| <-- fill-pointer - -(declaim (inline wam-stack-frame-ce - wam-stack-frame-cp - wam-stack-frame-cut - wam-stack-frame-n - wam-stack-frame-size - wam-stack-frame-argument-address - wam-set-stack-frame-argument!)) - - -(defun wam-stack-frame-ce (wam &optional (e (wam-environment-pointer wam))) - (wam-stack-word wam e)) - -(defun wam-stack-frame-cp (wam &optional (e (wam-environment-pointer wam))) - (wam-stack-word wam (1+ e))) - -(defun wam-stack-frame-cut (wam &optional (e (wam-environment-pointer wam))) - (wam-stack-word wam (+ 2 e))) - -(defun wam-stack-frame-n (wam &optional (e (wam-environment-pointer wam))) - (wam-stack-word wam (+ 3 e))) - - -(defun wam-stack-frame-argument-address - (wam n &optional (e (wam-environment-pointer wam))) - (+ 4 n e)) - -(defun wam-set-stack-frame-argument! (wam n type value - &optional (e (wam-environment-pointer wam))) - (wam-set-store-cell! wam (wam-stack-frame-argument-address wam n e) - type value)) - -(defun wam-copy-to-stack-frame-argument! (wam n source - &optional (e (wam-environment-pointer wam))) - (wam-copy-store-cell! wam (wam-stack-frame-argument-address wam n e) - source)) - - -(defun wam-stack-frame-size (wam &optional (e (wam-environment-pointer wam))) - "Return the size of the stack frame starting at environment pointer `e`." - (+ (wam-stack-frame-n wam e) 4)) - - -;;; Choice point frames are laid out like so: -;;; -;;; |PREV| -;;; 0 | N | number of arguments <-- backtrack-pointer -;;; 1 | CE | continuation environment -;;; 2 | CP | continuation pointer -;;; 3 | CB | previous choice point -;;; 4 | BP | next clause -;;; 5 | TR | trail pointer -;;; 6 | H | heap pointer -;;; 7 | CC | saved cut pointer -;;; 8 | A0 | -;;; | .. | -;;; 8+n | An | -;;; |NEXT| <-- environment-pointer -;;; -;;; This is a bit different than the book. We stick the args at the end of the -;;; frame instead of the beginning so it's easier to retrieve the other values. - -(declaim (inline wam-stack-choice-n - wam-stack-choice-ce - wam-stack-choice-cp - wam-stack-choice-cb - wam-stack-choice-cc - wam-stack-choice-bp - wam-stack-choice-tr - wam-stack-choice-h - wam-stack-choice-size - wam-stack-choice-argument-address - wam-set-stack-choice-argument! - wam-copy-to-stack-choice-argument!)) - - -(defun wam-stack-choice-n (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam b)) - -(defun wam-stack-choice-ce (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 1))) - -(defun wam-stack-choice-cp (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 2))) - -(defun wam-stack-choice-cb (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 3))) - -(defun wam-stack-choice-bp (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 4))) - -(defun wam-stack-choice-tr (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 5))) - -(defun wam-stack-choice-h (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 6))) - -(defun wam-stack-choice-cc (wam &optional (b (wam-backtrack-pointer wam))) - (wam-stack-word wam (+ b 7))) - - -(defun wam-stack-choice-argument-address - (wam n &optional (b (wam-backtrack-pointer wam))) - (+ 8 n b)) - -(defun wam-set-stack-choice-argument! (wam n type value - &optional (b (wam-backtrack-pointer wam))) - (wam-set-store-cell! wam (wam-stack-choice-argument-address wam n b) - type value)) - -(defun wam-copy-to-stack-choice-argument! (wam n source - &optional (b (wam-backtrack-pointer wam))) - (wam-copy-store-cell! wam (wam-stack-choice-argument-address wam n b) - source)) - - -(defun wam-stack-choice-size (wam &optional (b (wam-backtrack-pointer wam))) - "Return the size of the choice frame starting at backtrack pointer `b`." - (+ (wam-stack-choice-n wam b) 8)) - - -(defun wam-stack-top (wam) - "Return the top of the stack. - - This is the first place it's safe to overwrite in the stack. - - " - ;; The book is wrong here -- it looks up the "current frame size" to - ;; determine where the next frame should start, but on the first allocation - ;; there IS no current frame so it looks at garbage. Fuckin' great. - (let ((e (wam-environment-pointer wam)) - (b (wam-backtrack-pointer wam))) - (cond - ((and (wam-backtrack-pointer-unset-p wam b) - (wam-environment-pointer-unset-p wam e)) ; first allocation - (1+ +stack-start+)) - ((> e b) ; the last thing on the stack is a frame - (+ e (wam-stack-frame-size wam e))) - (t ; the last thing on the stack is a choice point - (+ b (wam-stack-choice-size wam b)))))) - - -;;;; Resetting -(defun wam-truncate-heap! (wam) - ;; todo: null out the heap once we're storing live objects - (setf (wam-heap-pointer wam) (1+ +heap-start+))) - -(defun wam-truncate-trail! (wam) - (setf (fill-pointer (wam-trail wam)) 0)) - -(defun wam-truncate-unification-stack! (wam) - (setf (fill-pointer (wam-unification-stack wam)) 0)) - -(defun wam-reset-local-registers! (wam) - (fill (wam-type-store wam) +cell-type-null+ :start 0 :end +register-count+) - (fill (wam-value-store wam) 0 :start 0 :end +register-count+)) - -(defun wam-reset! (wam) - (wam-truncate-heap! wam) - (wam-truncate-trail! wam) - (wam-truncate-unification-stack! wam) - (policy-cond:policy-if (>= debug 2) - ;; todo we can't elide this once we start storing live objects... :( - (wam-reset-local-registers! wam) - nil) ; fuck it - (fill (wam-code wam) 0 :start 0 :end +maximum-query-size+) - (setf (wam-program-counter wam) 0 - (wam-continuation-pointer wam) 0 - (wam-environment-pointer wam) +stack-start+ - (wam-backtrack-pointer wam) +stack-start+ - (wam-cut-pointer wam) +stack-start+ - (wam-heap-backtrack-pointer wam) +heap-start+ - (wam-backtracked wam) nil - (wam-fail wam) nil - (wam-subterm wam) +heap-start+ - (wam-mode wam) nil)) - - -;;;; Code -;;; The WAM needs to be able to look up predicates at runtime. To do this we -;;; keep a data structure that maps a functor and arity to a location in the -;;; code store. -;;; -;;; This data structure is an array, with the arity we're looking up being the -;;; position. At that position will be a hash tables of the functor symbols to -;;; the locations. -;;; -;;; Each arity's table will be created on-the-fly when it's first needed. - -(defun retrieve-instruction (code-store address) - "Return the full instruction at the given address in the code store." - (make-array (instruction-size (aref code-store address)) - :displaced-to code-store - :displaced-index-offset address - :adjustable nil - :element-type 'code-word)) - - -(defun wam-code-label (wam functor arity) - (let ((atable (aref (wam-code-labels wam) arity))) - (when atable - (values (gethash functor atable))))) - -(defun (setf wam-code-label) (new-value wam functor arity) - (setf (gethash functor (aref-or-init (wam-code-labels wam) arity - (make-hash-table :test 'eq))) - new-value)) - -(defun wam-code-label-remove! (wam functor arity) - (let ((atable (aref (wam-code-labels wam) arity))) - (when atable - ;; todo: remove the table entirely when empty? - (remhash functor atable)))) - - -(declaim (ftype (function (wam query-code-holder query-size) - (values null &optional)) - wam-load-query-code!)) -(defun wam-load-query-code! (wam query-code query-size) - (setf (subseq (wam-code wam) 0 query-size) query-code) - nil) - - -;;;; Logic Stack -;;; The logic stack is stored as a simple list in the WAM. `logic-frame` -;;; structs are pushed and popped from this list as requested. -;;; -;;; There's one small problem: logic frames need to keep track of which -;;; predicates are awaiting compilation, and the best data structure for that is -;;; a hash table. But hash tables are quite expensive to allocate when you're -;;; pushing and popping tons of frames per second. So the WAM also keeps a pool -;;; of logic frames to reuse, which lets us simply `clrhash` in between instead -;;; of having to allocate a brand new hash table. - -(declaim (inline assert-logic-frame-poppable)) - - -(defstruct logic-frame - (start 0 :type code-index) - (final nil :type boolean) - (predicates (make-hash-table :test 'equal) :type hash-table)) - - -(defun wam-logic-pool-release (wam frame) - (with-slots (start final predicates) frame - (clrhash predicates) - (setf start 0 final nil)) - (push frame (wam-logic-pool wam)) - nil) - -(defun wam-logic-pool-request (wam) - (or (pop (wam-logic-pool wam)) - (make-logic-frame))) - - -(defun wam-current-logic-frame (wam) - (first (wam-logic-stack wam))) - -(defun wam-logic-stack-empty-p (wam) - (not (wam-current-logic-frame wam))) - - -(defun wam-logic-open-p (wam) - (let ((frame (wam-current-logic-frame wam))) - (and frame (not (logic-frame-final frame))))) - -(defun wam-logic-closed-p (wam) - (not (wam-logic-open-p wam))) - - -(defun wam-push-logic-frame! (wam) - (assert (wam-logic-closed-p wam) () - "Cannot push logic frame unless the logic stack is closed.") - (let ((frame (wam-logic-pool-request wam))) - (setf (logic-frame-start frame) - (wam-code-pointer wam)) - (push frame (wam-logic-stack wam))) - nil) - -(defun assert-logic-frame-poppable (wam) - (let ((logic-stack (wam-logic-stack wam))) - (policy-cond:policy-if (or (> safety 1) (> debug 0) (< speed 3)) - ;; Slow - (progn - (assert logic-stack () - "Cannot pop logic frame from an empty logic stack.") - (assert (logic-frame-final (first logic-stack)) () - "Cannot pop unfinalized logic frame.")) - ;; Fast - (when (or (not logic-stack) - (not (logic-frame-final (first logic-stack)))) - (error "Cannot pop logic frame."))))) - -(defun wam-pop-logic-frame! (wam) - (with-slots (logic-stack) wam - (assert-logic-frame-poppable wam) - (let ((frame (pop logic-stack))) - (setf (wam-code-pointer wam) - (logic-frame-start frame)) - (loop :for (functor . arity) - :being :the hash-keys :of (logic-frame-predicates frame) - :do (wam-code-label-remove! wam functor arity)) - (wam-logic-pool-release wam frame))) - nil) - - -(defun assert-label-not-already-compiled (wam clause functor arity) - (assert (not (wam-code-label wam functor arity)) - () - "Cannot add clause ~S because its predicate has preexisting compiled code." - clause)) - -(defun wam-logic-frame-add-clause! (wam clause) - (assert (wam-logic-open-p wam) () - "Cannot add clause ~S without an open logic stack frame." - clause) - - (multiple-value-bind (functor arity) (find-predicate clause) - (assert-label-not-already-compiled wam clause functor arity) - (enqueue clause (gethash-or-init - (cons functor arity) - (logic-frame-predicates (wam-current-logic-frame wam)) - (make-queue)))) - nil) - - -(defun wam-finalize-logic-frame! (wam) - (assert (wam-logic-open-p wam) () - "There is no logic frame waiting to be finalized.") - (with-slots (predicates final) - (wam-current-logic-frame wam) - (loop :for clauses :being :the hash-values :of predicates - ;; circular dep on the compiler here, ugh. - :do (compile-rules wam (queue-contents clauses))) - (setf final t)) - nil) - - -;;;; Registers -;;; The WAM has two types of registers: -;;; -;;; * Local/temporary/arguments registers live at the beginning of the WAM -;;; memory store. -;;; -;;; * Stack/permanent registers live on the stack, and need some extra math to -;;; find their location. -;;; -;;; Registers are typically denoted by their "register index", which is just -;;; their number. Hoever, the bytecode needs to be able to distinguish between -;;; local and stack registers. To do this we just make separate opcodes for -;;; each kind. This is ugly, but it lets us figure things out at compile time -;;; instead of runtime, and register references happen A LOT at runtime. -;;; -;;; As for the CONTENTS of registers: a register (regardless of type) always -;;; contains a cell. The book is maddeningly unclear on this in a bunch of -;;; ways. I will list them here so maybe you can feel a bit of my suffering -;;; through these bytes of text. -;;; -;;; The first thing the book says about registers is "registers have the same -;;; format as heap cells". Okay, fine. The *very next diagram* shows "register -;;; assignments" that appear to put things that are very much *not* heap cells -;;; into registers! -;;; -;;; After a bit of puttering you realize that the diagram is referring only to -;;; the compilation, not what's *actually* stored in these registers at runtime. -;;; You move on and see some pseudocode that contains `X_i <- HEAP[H]` which -;;; confirms that his original claim was accurate, and registers are actually -;;; (copies of) heap cells. Cool. -;;; -;;; Then you move on and see the definition of `deref(a : address)` and note -;;; that it takes an *address* as an argument. On the next page you see -;;; `deref(X_i)` and wait what the fuck, a register is an *address* now? You -;;; scan down the page and see `HEAP[H] <- X_i` which means no wait it's a cell -;;; again. -;;; -;;; After considering depositing your laptop into the nearest toilet and -;;; becoming a sheep farmer, you conclude a few things: -;;; -;;; 1. The book's code won't typecheck. -;;; 2. The author is playing fast and loose with `X_i` -- sometimes it seems to -;;; be used as an address, sometimes as a cell. -;;; 3. The author never bothers to nail down exactly what is inside the fucking -;;; things, which is a problem because of #2. -;;; -;;; If you're like me (painfully unlucky), you took a wild guess and decided to -;;; implement registers as containing *addresses*, i.e., indexes into the -;;; heap, figuring that if you were wrong it would soon become apparent. -;;; -;;; WELL it turns out that you can get all the way to CHAPTER FIVE with -;;; registers implemented as addresses, at which point you hit a wall and need -;;; to spend a few hours refactoring a giant chunk of your code and writing -;;; angry comments in your source code. -;;; -;;; Hopefully I can save someone else this misery by leaving you with this: -;;; ____ _____________________________________ _____ ___ ____ ______ ______________ __ _____ -;;; / __ \/ ____/ ____/ _/ ___/_ __/ ____/ __ \/ ___/ / | / __ \/ ____/ / ____/ ____/ / / / / ___/ -;;; / /_/ / __/ / / __ / / \__ \ / / / __/ / /_/ /\__ \ / /| | / /_/ / __/ / / / __/ / / / / \__ \ -;;; / _, _/ /___/ /_/ // / ___/ // / / /___/ _, _/___/ / / ___ |/ _, _/ /___ / /___/ /___/ /___/ /______/ / -;;; /_/ |_/_____/\____/___//____//_/ /_____/_/ |_|/____/ /_/ |_/_/ |_/_____/ \____/_____/_____/_____/____/ - -(declaim (inline wam-set-local-register! - wam-set-stack-register! - wam-local-register-address - wam-stack-register-address - wam-local-register-type - wam-stack-register-type - wam-local-register-value - wam-stack-register-value - wam-copy-to-local-register! - wam-copy-to-stack-register! - wam-local-register-address - wam-stack-register-address)) - - -(defun wam-local-register-address (wam register) - (declare (ignore wam)) - register) - -(defun wam-stack-register-address (wam register) - (wam-stack-frame-argument-address wam register)) - - -(defun wam-local-register-type (wam register) - (wam-store-type wam (wam-local-register-address wam register))) - -(defun wam-stack-register-type (wam register) - (wam-store-type wam (wam-stack-register-address wam register))) - - -(defun wam-local-register-value (wam register) - (wam-store-value wam (wam-local-register-address wam register))) - -(defun wam-stack-register-value (wam register) - (wam-store-value wam (wam-stack-register-address wam register))) - - -(defun wam-set-local-register! (wam address type value) - (wam-set-store-cell! wam (wam-local-register-address wam address) - type value)) - -(defun wam-set-stack-register! (wam address type value) - (wam-set-stack-frame-argument! wam address type value)) - - -(defun wam-copy-to-local-register! (wam destination source) - (wam-copy-store-cell! wam (wam-local-register-address wam destination) source)) - -(defun wam-copy-to-stack-register! (wam destination source) - (wam-copy-store-cell! wam (wam-stack-register-address wam destination) source)) - - -;;;; Unification Stack -(declaim (inline wam-unification-stack-push! - wam-unification-stack-pop! - wam-unification-stack-empty-p)) - - -(defun wam-unification-stack-push! (wam address1 address2) - (vector-push-extend address1 (wam-unification-stack wam)) - (vector-push-extend address2 (wam-unification-stack wam))) - -(defun wam-unification-stack-pop! (wam) - (vector-pop (wam-unification-stack wam))) - -(defun wam-unification-stack-empty-p (wam) - (zerop (length (wam-unification-stack wam))))