src/wam/compiler.lisp @ ceecc846dd67

Add a benchrunner
author Steve Losh <steve@stevelosh.com>
date Mon, 02 May 2016 17:55:07 +0000
parents 1ab41e0128dc
children c77968cd3c51
(in-package #:bones.wam)
(named-readtables:in-readtable :fare-quasiquote)

;;;; Registers
(deftype register-type ()
  '(member :argument :local :permanent))

(deftype register-number ()
  '(integer 0))


(defclass register ()
  ((type
     :initarg :type
     :reader register-type
     :type register-type)
   (number
     :initarg :number
     :reader register-number
     :type register-number)))


(defun* make-register ((type register-type) (number register-number))
  (:returns register)
  (make-instance 'register :type type :number number))

(defun* make-temporary-register ((number register-number) (arity arity))
  (:returns register)
  (make-register (if (< number arity) :argument :local)
                 number))

(defun* make-permanent-register ((number register-number) (arity arity))
  (:returns register)
  (declare (ignore arity))
  (make-register :permanent number))


(defun* register-to-string ((register register))
  (format nil "~A~D"
          (ecase (register-type register)
            (:argument #\A)
            (:local #\X)
            (:permanent #\Y))
          (+ (register-number register)
             (if *off-by-one* 1 0))))

(defmethod print-object ((object register) stream)
  (print-unreadable-object (object stream :identity nil :type nil)
    (format stream (register-to-string object))))


(defun* register-temporary-p ((register register))
  (member (register-type register) '(:argument :local)))

(defun* register-permanent-p ((register register))
  (eql (register-type register) :permanent))


(defun* register= ((r1 register) (r2 register))
  (:returns boolean)
  (ensure-boolean
    (and (eql (register-type r1)
              (register-type r2))
         (= (register-number r1)
            (register-number r2)))))

(defun* register≈ ((r1 register) (r2 register))
  (:returns boolean)
  (ensure-boolean
    (and (or (eql (register-type r1)
                  (register-type r2))
             ;; local and argument registers are actually the same register,
             ;; just named differently
             (and (register-temporary-p r1)
                  (register-temporary-p r2)))
         (= (register-number r1)
            (register-number r2)))))


;;;; Register Assignments
(deftype register-assignment ()
  ;; A register assignment represented as a cons of (register . contents).
  '(cons register t))

(deftype register-assignment-list ()
  '(trivial-types:association-list register t))


(defun* pprint-assignments ((assignments register-assignment-list))
  (format t "~{~A~%~}"
          (loop :for (register . contents) :in assignments :collect
                (format nil "~A <- ~S" (register-to-string register) contents))))

(defun* find-assignment ((register register)
                         (assignments register-assignment-list))
  (:returns register-assignment)
  "Find the assignment for the given register number in the assignment list."
  (assoc register assignments))


(defun* variable-p (term)
  (:returns boolean)
  (ensure-boolean (keywordp term)))


(defun* variable-assignment-p ((assignment register-assignment))
  "Return whether the register assigment is a simple variable assignment.

  E.g. `X1 = Foo` is simple, but `X2 = f(...)` is not.

  Note that register assignments actually look like `(1 . contents)`, so
  a simple variable assignment would be `(1 . :foo)`.

  "
  (:returns boolean)
  (variable-p (cdr assignment)))

(defun* variable-register-p ((register register)
                             (assignments register-assignment-list))
  (:returns boolean)
  "Return whether the given register contains a variable assignment."
  (variable-assignment-p (find-assignment register assignments)))


(defun* register-assignment-p ((assignment register-assignment))
  (:returns boolean)
  "Return whether the register assigment is a register-to-register assignment.

  E.g. `A1 = X2`.

  Note that this should only ever happen for argument registers.

  "
  (typep (cdr assignment) 'register))


(defun* structure-assignment-p ((assignment register-assignment))
  (:returns boolean)
  "Return whether the given assignment pair is a structure assignment."
  (listp (cdr assignment)))

(defun* structure-register-p ((register register)
                              (assignments register-assignment-list))
  (:returns boolean)
  "Return whether the given register contains a structure assignment."
  (structure-assignment-p (find-assignment register assignments)))


;;;; Parsing
;;; You might want to grab a coffee for this one.
;;;
;;; Consider this simple Prolog example: `p(A, q(A, r(B)))`.  We're going to get
;;; this as a Lisp list: `(p :a (q :a (r b)))`.
;;;
;;; The goal is to turn this list into a set of register assignments.  The book
;;; handwaves around how to do this, and it turns out to be pretty complicated.
;;; This example will (maybe, read on) be turned into:
;;;
;;;     A0 <- X2
;;;     A1 <- (q X2 X3)
;;;     X2 <- :a
;;;     X3 <- (r X4)
;;;     X4 <- :b
;;;
;;; There are a few things to note here.  First: like the book says, the
;;; outermost predicate is stripped off and returned separately (later it'll be
;;; used to label the code for a program, or to figure out the procedure to call
;;; for a query).
;;;
;;; The first N registers are designated as argument registers.  Structure
;;; assignments can live directly in the argument registers, but variables
;;; cannot.  In the example above we can see that A1 contains a structure
;;; assignment.  However, the variable `:a` doesn't live in A0 -- it lives in
;;; X2, which A0 points at.  The books neglects to explain this little fact.
;;;
;;; The next edge case is permanent variables, which the book does talk about.
;;; Permanent variables are allocated to stack registers, so if `:b` was
;;; permanent in our example we'd get:
;;;
;;;     A0 <- X2
;;;     A1 <- (q X2 X3)
;;;     X2 <- :a
;;;     X3 <- (r Y0)
;;;     Y0 <- :b
;;;
;;; Note that the mapping of permanent variables to stack register numbers has
;;; to be consistent as we compile all the terms in a clause, so we cheat a bit
;;; here and just always add them all, in order, to the register assignment
;;; produced when parsing.  They'll get flattened away later anyway -- it's the
;;; USES that we actually care about.  In our example, the `Y0 <- :b` will get
;;; flattened away, but the USE of Y0 in X3 will remain).
;;;
;;; We're almost done, I promise, but there's one more edge case to deal with.
;;;
;;; When we've got a clause with a head and at least one body term, we need the
;;; head term and the first body term to share argument/local registers.  For
;;; example, if we have the clause `p(Cats) :- q(A, B, C, Cats)` then when
;;; compiling the head `(p :cats)` we want to get:
;;;
;;;     A0 <- X4
;;;     A1 <- ???
;;;     A2 <- ???
;;;     A3 <- ???
;;;     X4 <- :cats
;;;
;;; And when compiling `(q :a :b :c :cats)` we need:
;;;
;;;     A0 <- X5
;;;     A1 <- X6
;;;     A2 <- X7
;;;     A3 <- X4
;;;     X4 <- :cats
;;;     X5 <- :a
;;;     X6 <- :b
;;;     X7 <- :c
;;;
;;; What the hell are those empty argument registers in p?  And why did we order
;;; the X registers of q like that?
;;;
;;; The book does not bother to mention this important fact at all, so to find
;;; out that you have to handle this you need to do the following:
;;;
;;; 1. Implement it without this behavior.
;;; 2. Notice your results are wrong.
;;; 3. Figure out the right bytecode on a whiteboard.
;;; 4. Try to puzzle out why that bytecode isn't generated when you do exactly
;;;    what the book says.
;;; 5. Scour IRC and the web for scraps of information on what the hell you need
;;;    to do here.
;;; 6. Find the answer in a comment squirreled away in a source file somewhere
;;;    in a language you don't know.
;;; 7. Drink.
;;;
;;; Perhaps you're reading this comment as part of step 6 right now.  If so:
;;; welcome aboard.  Email me and we can swap horror stories about this process
;;; over drinks some time.
;;;
;;; Okay, so the clause head and first body term need to share argument/local
;;; registers.  Why?  To understand this, we need to go back to what Prolog
;;; clauses are supposed to do.
;;;
;;; Imagine we have:
;;;
;;;     p(f(X)) :- q(X), ...other goals.
;;;
;;; When we want to check if `p(SOMETHING)` is true, we need to first unify
;;; SOMETHING with `f(X)`.  Then we search all of the goals in the body, AFTER
;;; substituting in any X's in those goals with the X from the result of the
;;; unification.
;;;
;;; This substitution is why we need the head and the first term in the body to
;;; share the same argument/local registers.  By sharing the registers, when the
;;; body term builds a representation of itself on the stack before calling its
;;; predicate any references to X will be point at the (unified) results instead
;;; of fresh ones (because they'll be compiled as `put_value` instead of
;;; `put_variable`).
;;;
;;; But wait: don't we need to substitute into ALL the body terms, not just the
;;; first one?  Yes we do, but the trick is that any variables in the REST of
;;; the body that would need to be substituted must, by definition, be permanent
;;; variables!  So the substitution process for the rest of the body is handled
;;; automatically with the stack machinery.
;;;
;;; In theory, you could eliminate this edge case by NOT treating the head and
;;; first goal as a single term when searching for permanent variables.  Then
;;; all substitution would happen elegantly through the stack.  But this
;;; allocates more variables on the stack than you really need (especially for
;;; rules with just a single term in the body (which is many of them)), so we
;;; have this extra corner case to optimize it away.
;;;
;;; We now return you to your regularly scheduled Lisp code.

(defun parse-term (term permanent-variables
                   ;; JESUS TAKE THE WHEEL
                   &optional reserved-variables reserved-arity)
  "Parse a term into a series of register assignments.

  Returns:

    * The assignment list
    * The root functor
    * The root functor's arity

  "
  (let* ((predicate (first term))
         (arguments (rest term))
         (arity (length arguments))
         ;; Preallocate enough registers for all of the arguments.  We'll fill
         ;; them in later.  Note that things are more complicated in the head
         ;; and first body term of a clause (see above).
         (local-registers (make-array 64
                            :fill-pointer (or reserved-arity arity)
                            :adjustable t
                            :initial-element nil))
         ;; We essentially "preallocate" all the permanent variables up front
         ;; because we need them to always be in the same stack registers across
         ;; all the terms of our clause.
         ;;
         ;; The ones that won't get used in this term will end up getting
         ;; flattened away anyway.
         (stack-registers (make-array (length permanent-variables)
                            :initial-contents permanent-variables)))
    (loop :for variable :in reserved-variables :do
          (vector-push-extend variable local-registers))
    (labels
        ((find-variable (var)
           (let ((r (position var local-registers))
                 (s (position var stack-registers)))
             (cond
               (r (make-temporary-register r arity))
               (s (make-permanent-register s arity))
               (t nil))))
         (store-variable (var)
           (make-temporary-register
             (vector-push-extend var local-registers)
             arity))
         (parse-variable (var)
           ;; If we've already seen this variable just return the register it's
           ;; in, otherwise allocate a register for it and return that.
           (or (find-variable var)
               (store-variable var)))
         (parse-structure (structure reg)
           (destructuring-bind (functor . arguments) structure
             ;; If we've been given a register to hold this structure (i.e.
             ;; we're parsing a top-level argument) use it.  Otherwise allocate
             ;; a fresh one.  Note that structures always live in local
             ;; registers, never permanent ones.
             (let ((reg (or reg (vector-push-extend nil local-registers))))
               (setf (aref local-registers reg)
                     (cons functor (mapcar #'parse arguments)))
               (make-temporary-register reg arity))))
         (parse (term &optional register)
           (cond
             ((variable-p term) (parse-variable term))
             ((symbolp term) (parse (list term) register)) ; f -> f/0
             ((listp term) (parse-structure term register))
             (t (error "Cannot parse term ~S." term))))
         (make-assignment-list (registers register-maker)
           (loop :for i :from 0
                 :for contents :across registers
                 :when contents :collect ; don't include unused reserved regs
                 (cons (funcall register-maker i arity)
                       contents))))
      ;; Arguments are handled specially.  We parse the children as normal,
      ;; and then fill in the argument registers after each child.
      (loop :for argument :in arguments
            :for i :from 0
            :for parsed = (parse argument i)
            ;; If the argument didn't fill itself in (structure), do it.
            :when (not (aref local-registers i))
            :do (setf (aref local-registers i) parsed))
      (values (append
                (make-assignment-list local-registers #'make-temporary-register)
                (make-assignment-list stack-registers #'make-permanent-register))
              predicate
              arity))))


;;;; Flattening
;;; "Flattening" is the process of turning a series of register assignments into
;;; a sorted sequence appropriate for turning into a series of instructions.
;;;
;;; The order depends on whether we're compiling a query term or a program term.
;;;
;;; It's a stupid name because the assignments are already flattened as much as
;;; they ever will be.  "Sorting" would be a better name.  Maybe I'll change it
;;; once I'm done with the book.
;;;
;;; Turns:
;;;
;;;   X0 <- p(X1, X2)
;;;   X1 <- A
;;;   X2 <- q(X1, X3)
;;;   X3 <- B
;;;
;;; into something like:
;;;
;;;   X2 <- q(X1, X3), X0 <- p(X1, X2)

(defun find-dependencies (assignments)
  "Return a list of dependencies amongst the given registers.

  Each entry will be a cons of `(a . b)` if register `a` depends on `b`.

  "
  (mapcan
    (lambda (assignment)
      (cond
        ; Variable assignments (X1 <- Foo) don't depend on anything else.
        ((variable-assignment-p assignment)
         ())
        ; Register assignments (A0 <- X5) have one obvious dependency.
        ((register-assignment-p assignment)
         (destructuring-bind (argument . contents) assignment
           (list `(,contents . ,argument))))
        ; Structure assignments depend on all the functor's arguments.
        ((structure-assignment-p assignment)
         (destructuring-bind (target . (functor . reqs))
             assignment
           (declare (ignore functor))
           (loop :for req :in reqs
                 :collect (cons req target))))
        (t (error "Cannot find dependencies for assignment ~S." assignment))))
    assignments))


(defun flatten (assignments)
  "Flatten the set of register assignments into a minimal set.

  We remove the plain old variable assignments (in non-argument registers)
  because they're not actually needed in the end.

  "
  (-<> assignments
    (topological-sort <> (find-dependencies assignments)
                      :key #'car
                      :key-test #'register=
                      :test #'eql)
    (remove-if #'variable-assignment-p <>)))

(defun flatten-query (assignments)
  (flatten assignments))

(defun flatten-program (assignments)
  (reverse (flatten assignments)))


;;;; Tokenization
;;; Tokenizing takes a flattened set of assignments and turns it into a stream
;;; of structure assignments and bare registers.
;;;
;;; It turns:
;;;
;;;   X2 <- q(X1, X3)
;;;   X0 <- p(X1, X2)
;;;   A3 <- X4
;;;
;;; into something like:
;;;
;;;   (X2 = q/2), X1, X3, (X0 = p/2), X1, X2, (A3 = X4)

(defun tokenize-assignments (assignments)
  "Tokenize a flattened set of register assignments into a stream."
  (mapcan
    (lambda (ass)
      ;; Take a single assignment like:
      ;;   X1 = f(X4, Y1)         (X1 . (f X4 Y1))
      ;;   A0 = X5                (A0 . X5)
      ;;
      ;; And turn it into a stream of tokens:
      ;;   (X1 = f/2), X4, Y1      ((:structure X1 f 2) X4 Y1
      ;;   (A0 = X5)                (:argument A0 X5))
      (if (register-assignment-p ass)
        ;; It might be a register assignment for an argument register.
        (destructuring-bind (argument-register . target-register) ass
          (list (list :argument argument-register target-register)))
        ;; Otherwise it's a structure assignment.  We know the others have
        ;; gotten flattened away by now.
        (destructuring-bind (register . (functor . arguments)) ass
          (cons (list :structure register functor (length arguments))
                arguments))))
    assignments))


(defun tokenize-term
    (term permanent-variables reserved-variables reserved-arity flattener)
  (multiple-value-bind (assignments functor arity)
      (parse-term term permanent-variables reserved-variables reserved-arity)
    (values (->> assignments
              (funcall flattener)
              tokenize-assignments)
            functor
            arity)))

(defun tokenize-program-term
    (term permanent-variables reserved-variables reserved-arity)
  "Tokenize `term` as a program term, returning its tokens."
  (values (tokenize-term term
                         permanent-variables
                         reserved-variables
                         reserved-arity
                         #'flatten-program)))

(defun tokenize-query-term
    (term permanent-variables &optional reserved-variables reserved-arity)
  "Tokenize `term` as a query term, returning its stream of tokens."
  (multiple-value-bind (tokens functor arity)
      (tokenize-term term
                     permanent-variables
                     reserved-variables
                     reserved-arity
                     #'flatten-query)
    ;; We need to shove a CALL token onto the end.
    (append tokens `((:call ,functor ,arity)))))


;;;; Bytecode
;;; Once we have a tokenized stream we can generate the machine instructions
;;; from it.
;;;
;;; We turn:
;;;
;;;   (X2 = q/2), X1, X3, (X0 = p/2), X1, X2
;;;
;;; into something like:
;;;
;;;   (#'%put-structure 2 q 2)
;;;   (#'%set-variable 1)
;;;   (#'%set-variable 3)
;;;   (#'%put-structure 0 p 2)
;;;   (#'%set-value 1)
;;;   (#'%set-value 2)

(defun find-opcode (opcode newp mode &optional register)
  (flet ((find-variant (register)
           (when register
             (if (register-temporary-p register)
               :local
               :stack))))
    (eswitch ((list opcode newp mode (find-variant register)) :test #'equal)
      ('(:argument t   :program :local) +opcode-get-variable-local+)
      ('(:argument t   :program :stack) +opcode-get-variable-stack+)
      ('(:argument t   :query   :local) +opcode-put-variable-local+)
      ('(:argument t   :query   :stack) +opcode-put-variable-stack+)
      ('(:argument nil :program :local) +opcode-get-value-local+)
      ('(:argument nil :program :stack) +opcode-get-value-stack+)
      ('(:argument nil :query   :local) +opcode-put-value-local+)
      ('(:argument nil :query   :stack) +opcode-put-value-stack+)
      ;; Structures can only live locally, they never go on the stack
      ('(:structure nil :program :local) +opcode-get-structure-local+)
      ('(:structure nil :query   :local) +opcode-put-structure-local+)
      ('(:register t   :program :local) +opcode-unify-variable-local+)
      ('(:register t   :program :stack) +opcode-unify-variable-stack+)
      ('(:register t   :query   :local) +opcode-set-variable-local+)
      ('(:register t   :query   :stack) +opcode-set-variable-stack+)
      ('(:register nil :program :local) +opcode-unify-value-local+)
      ('(:register nil :program :stack) +opcode-unify-value-stack+)
      ('(:register nil :query   :local) +opcode-set-value-local+)
      ('(:register nil :query   :stack) +opcode-set-value-stack+))))

(defun compile-tokens (wam head-tokens body-tokens store)
  "Generate a series of machine instructions from a stream of head and body
  tokens.

  The `head-tokens` should be program-style tokens, and are compiled in program
  mode.  The `body-tokens` should be query-style tokens, and are compiled in
  query mode.

  Actual queries are a special case where the `head-tokens` stream is `nil`

  The compiled instructions will be appended to `store` using
  `code-push-instructions!`.

  "
  (let ((seen (list))
        (mode nil))
    (labels
        ((handle-argument (argument-register source-register)
           ;; OP X_n A_i
           (let ((newp (push-if-new source-register seen :test #'register=)))
             (code-push-instruction! store
                 (find-opcode :argument newp mode source-register)
               (register-number source-register)
               (register-number argument-register))))
         (handle-structure (destination-register functor arity)
           ;; OP functor reg
           (push destination-register seen)
           (code-push-instruction! store
               (find-opcode :structure nil mode destination-register)
             (wam-ensure-functor-index wam (cons functor arity))
             (register-number destination-register)))
         (handle-call (functor arity)
           ;; CALL functor
           (code-push-instruction! store
               +opcode-call+
             (wam-ensure-functor-index wam (cons functor arity))))
         (handle-register (register)
           ;; OP reg
           (let ((newp (push-if-new register seen :test #'register=)))
             (code-push-instruction! store
                 (find-opcode :register newp mode register)
               (register-number register))))
         (handle-stream (tokens)
           (loop :for token :in tokens :collect
                 (ematch token
                   ((guard `(:argument ,argument-register ,source-register)
                           (and (eql (register-type argument-register) :argument)
                                (member (register-type source-register)
                                        '(:local :permanent))))
                    (handle-argument argument-register source-register))
                   ((guard `(:structure ,destination-register ,functor ,arity)
                           (member (register-type destination-register)
                                   '(:local :argument)))
                    (handle-structure destination-register functor arity))
                   (`(:call ,functor ,arity)
                    (handle-call functor arity))
                   ((guard register
                           (typep register 'register))
                    (handle-register register))))))
      (when head-tokens
        (setf mode :program)
        (handle-stream head-tokens))
      (setf mode :query)
      (handle-stream body-tokens))))


;;;; Compilation
(defun find-variables (terms)
  "Return the set of variables in `terms`."
  (remove-duplicates (tree-collect #'variable-p terms)))

(defun find-shared-variables (terms)
  "Return the set of all variables shared by two or more terms."
  (labels
      ((count-uses (variable)
         (count-if (curry #'tree-member-p variable) terms))
       (shared-p (variable)
         (> (count-uses variable) 1)))
    (remove-if-not #'shared-p (find-variables terms))))

(defun find-permanent-variables (clause)
  "Return a list of all the permanent variables in `clause`.

  Permanent variables are those that appear in more than one goal of the clause,
  where the head of the clause is considered to be a part of the first goal.

  "
  (if (<= (length clause) 2)
    (list) ; facts and chain rules have no permanent variables at all
    (destructuring-bind (head body-first . body-rest) clause
      ;; the head is treated as part of the first goal for the purposes of
      ;; finding permanent variables
      (find-shared-variables (cons (cons head body-first) body-rest)))))

(defun find-head-variables (clause)
  (if (<= (length clause) 1)
    (list)
    (destructuring-bind (head body-first . body-rest) clause
      (declare (ignore body-rest))
      (find-shared-variables (list head body-first)))))


(defun compile-clause (wam store head body)
  "Compile the clause directly into `store` and return the permanent variables.

  `head` should be the head of the clause for program clauses, or `nil` for
  query clauses.

  `body` is the body of the clause, or `nil` for facts.

  "
  (let* ((permanent-variables
           (if (null head)
             ;; For query clauses we cheat a bit and make ALL variables
             ;; permanent, so we can extract their bindings as results later.
             (find-variables body)
             (find-permanent-variables (cons head body))))
         (head-variables
           (set-difference (find-head-variables (cons head body))
                           permanent-variables))
         (head-arity
           (max (1- (length head))
                (1- (length (car body)))))
         (head-tokens
           (when head
             (tokenize-program-term head
                                    permanent-variables
                                    head-variables
                                    head-arity)))
         (body-tokens
           (when body
             (append
               (tokenize-query-term (first body)
                                    permanent-variables
                                    head-variables
                                    head-arity)
               (loop :for term :in (rest body) :append
                     (tokenize-query-term term
                                          permanent-variables))))))
    (flet ((compile% () (compile-tokens wam head-tokens body-tokens store)))
      ;; We need to compile facts and rules differently.  Facts end with
      ;; a PROCEED and rules are wrapped in ALOC/DEAL.
      (cond
        ((and head body) ; a full-ass rule
         (code-push-instruction! store +opcode-allocate+ (length permanent-variables))
         (compile%)
         (code-push-instruction! store +opcode-deallocate+))
        ((and head (null body)) ; a bare fact
         (compile%)
         (code-push-instruction! store +opcode-proceed+))
        (t ; a query
         ;; The book doesn't have this ALOC here, but we do it to aid in result
         ;; extraction.  Basically, to make extracting th results of a query
         ;; easier we allocate all of its variables on the stack, so we need
         ;; push a stack frame for them before we get started.  We don't DEAL
         ;; because we want the frame to be left on the stack at the end so we
         ;; can poke at it.
         (code-push-instruction! store +opcode-allocate+ (length permanent-variables))
         (compile%)
         (code-push-instruction! store +opcode-done+))))
    permanent-variables))


;;; Queries
(defun make-query-code-store ()
  (make-array 64
    :fill-pointer 0
    :adjustable t
    :element-type 'code-word))

(defun compile-query (wam query)
  "Compile `query` into a fresh array of bytecode.

  `query` should be a list of goal terms.

  Returns the fresh code array and the permanent variables.

  "
  (let* ((store (make-query-code-store))
         (permanent-variables (compile-clause wam store nil query)))
    (values store permanent-variables)))


;;; Rules
(defun mark-label (wam functor arity address)
  "Set the code label `functor`/`arity` to point at `address`."
  (setf (wam-code-label wam functor arity) address))

(defun find-arity (rule)
  (let ((head (first rule)))
    (cond
      ((null head) (error "Rule ~S has a NIL head." rule))
      ((atom head) 0) ; constants are 0-arity
      (t (1- (length head))))))

(defun check-rules (rules)
  (let* ((predicates (mapcar #'caar rules))
         (arities (mapcar #'find-arity rules))
         (functors (zip predicates arities)))
    (assert (= 1 (length (remove-duplicates functors :test #'equal))) ()
      "Must add exactly 1 predicate at a time (got: ~S)."
      functors)
    (values (first predicates) (first arities))))

(defun compile-rules (wam rules)
  "Compile `rules` into the WAM's code store.

  Each rule in `rules` should be a clause consisting of a head term and zero or
  more body terms.  A rule with no body is called a fact.

  "
  (assert rules () "Cannot compile an empty program.")
  (*let ((code (wam-code wam))
         (previous-jump nil)
         ((:values functor arity) (check-rules rules)))
    (labels
        ((fill-jump (address)
           (when previous-jump
             (setf (aref code (1+ previous-jump)) address))
           (setf previous-jump address))
         (push-branch-instruction (first-p last-p)
           (cond
             (first-p
              (fill-jump (code-push-instruction! code +opcode-try+ 999)))
             (last-p
              (fill-jump (code-push-instruction! code +opcode-trust+)))
             (t
              (fill-jump (code-push-instruction! code +opcode-retry+ 999))))))
      ;; Mark the label to point at where we're about to stick the code.
      ;; TODO: this is ugly
      (mark-label wam functor arity (fill-pointer code))
      (if (= 1 (length rules))
        ;; Single-clause rules don't need to bother setting up a choice point.
        (destructuring-bind ((head . body)) rules
          (compile-clause wam code head body))
        ;; Otherwise we need to loop through each of the clauses, pushing their
        ;; choice point instruction first, then their actual code.
        (loop :for ((head . body) . remaining) :on rules
              :for first-p = t :then nil
              :do
              (push-branch-instruction first-p (null remaining))
              (compile-clause wam code head body)))))
  (values))