author |
Steve Losh <steve@stevelosh.com> |
date |
Wed, 13 Apr 2016 17:38:57 +0000 |
parents |
c97b74976098 |
children |
7627f8976a3e |
(in-package #:bones.wam)
(named-readtables:in-readtable :fare-quasiquote)
;;;; Parsing
;;; Turns p(A, q(A, B)) into something like:
;;;
;;; X0 -> p(X1, X2)
;;; X1 -> A
;;; X2 -> q(X1, X3)
;;; X3 -> B
;;;
;;; And then processes the argument register assignments into:
;;;
;;; p/2:
;;; A0 -> A
;;; A1 -> q(A1, X3)
;;; X2 -> B
(defun find-assignment (register assignments)
"Find the assignment for the given register number in the assignment list."
(assoc register assignments))
(defun variable-p (term)
(keywordp term))
(defun find-permanent-variables (clause)
"Return a list of all the 'permanent' variables in `clause`.
Permanent variables are those that appear in more than one goal of the clause,
where the head of the clause is considered to be a part of the first goal.
"
(if (< (length clause) 2)
(list) ; facts and chain rules have no permanent variables at all
(destructuring-bind (head body-first . body-rest) clause
;; the head is treated as part of the first goal for the purposes of
;; finding permanent variables
(let* ((goals (cons (cons head body-first) body-rest))
(variables (remove-duplicates (tree-collect #'variable-p goals))))
(flet ((permanent-p (variable)
"Permanent variables are those contained in more than 1 goal."
(> (count-if (curry #'tree-member-p variable)
goals)
1)))
(remove-if-not #'permanent-p variables))))))
(defun variable-assignment-p (ass)
"Return whether the register assigment is a simple variable assignment.
E.g. `X1 = Foo` is simple, but `X2 = f(...)` is not.
Note that register assignments actually look like `(1 . contents)`, so
a simple variable assignment would be `(1 . :foo)`.
"
(keywordp (cdr ass)))
(defun variable-register-p (register assignments)
"Return whether the given register contains a variable assignment."
(variable-assignment-p (find-assignment register assignments)))
(defun register-assignment-p (ass)
"Return whether the register assigment is a register-to-register assignment.
E.g. `A1 = X2`.
Note that this should only ever happen for argument registers.
"
(numberp (cdr ass)))
(defun structure-assignment-p (ass)
"Return whether the given assignment pair is a structure assignment."
(listp (cdr ass)))
(defun structure-register-p (register assignments)
"Return whether the given register contains a structure assignment."
(structure-assignment-p (find-assignment register assignments)))
(defun relocate-register (assignments from to)
"Relocate a register in the assignment list."
;; Takes an assignment list like:
;;
;; (0 . 2) ; A0 = X2
;; (1 . (f 2 3)) ; A1 = f(X2, X3)
;; (2 . :foo) ; X2 = Foo
;; (3 . :bar) ; X3 = Bar
(assert (< to from) (from to)
"Cannot relocate register ~D to ~D, destination must be before source."
from to)
(assert (not (tree-member-p to assignments)) (to)
"Cannot relocate register ~D to ~D in ~S, destination is already in use."
from to assignments)
(when assignments
(map-tree (lambda (r)
(if (numberp r)
(cond ((= r from) to) ; relocate the actual register
((> r from) (1- r)) ; decrement higher registers
((< r from) r)) ; pass through lower registers
r))
assignments)))
(defun parse-term (term)
"Parse a term into a series of register assignments.
Return:
* The assignment list
* The register types
* The root functor
* The root functor's arity
"
;; A term is a Lispy representation of the raw Prolog. A register assignment
;; is a cons of (register . assigned-to), e.g.:
;;
;; (p :foo (f :foo :bar))
;; ->
;; (0 . 2) ; A0 = X2
;; (1 . 4) ; A1 = X3
;; (2 . :foo) ; X2 = Foo
;; (3 . (f 2 4)) ; X3 = f(X2, X4)
;; (4 . :bar) ; X4 = Bar
(let* ((predicate (first term))
(arguments (rest term))
(arity (length arguments))
;; Preallocate enough registers for all of the arguments.
;; We'll fill them in later.
(registers (make-array 64 :fill-pointer arity :adjustable t)))
(labels
((parse-variable (var)
;; If we've already seen this variable, just return its position,
;; otherwise allocate a register for it.
(or (position var registers)
(vector-push-extend var registers)))
(parse-structure (structure)
(let* ((functor (first structure))
(arguments (rest structure))
(contents (list functor)))
(prog1
(vector-push-extend contents registers)
;; Parse the arguments and splice the results into this cell
;; once we're finished. The children should handle extending
;; the registers as needed.
(nconc contents (mapcar #'parse arguments)))))
(parse (term)
(cond
((variable-p term) (parse-variable term))
((symbolp term) (parse (list term))) ; f -> f/0
((listp term) (parse-structure term))
(t (error "Cannot parse term ~S." term)))))
;; Arguments are handled specially. We parse the children as normal,
;; and then fill in the argument registers after each child.
(loop :for argument :in arguments
:for i :from 0
:do (setf (aref registers i)
(parse argument)))
(values (loop :for i :from 0 ; turn the register array into an assignment list
:for reg :across registers
:collect (cons i reg))
predicate
arity))))
(defun inline-structure-argument-assignments (assignments arity)
"Inline structure register assignments directly into the argument registers."
;; After parsing the term we end up with something like:
;;
;; (0 . 2) ; A0 = X2
;; (1 . 4) ; A1 = X3 <---------+
;; (2 . :foo) ; X2 = Foo | inline this
;; (3 . (f 2 4)) ; X3 = f(X2, X4) ------+
;; (4 . :bar) ; X4 = Bar
;;
;; We want to "inline" any structure arguments into the argument registers.
(labels
((recur (remaining assignments)
(if (zerop remaining)
assignments
(let* ((argument-register (car assignments))
(argument-number (car argument-register))
(argument-target (cdr argument-register)))
(if (structure-register-p argument-target assignments)
(recur (1- remaining)
(relocate-register (cdr assignments)
argument-target
argument-number))
(cons argument-register
(recur (1- remaining)
(cdr assignments))))))))
(sort (recur arity assignments) #'< :key #'car)))
(defun register-types (assignments arity permanent-variables)
"Return the alist of register types for the given register assignments.
`assignments` must be sorted, and not flattened yet.
"
(loop :for i :from 0
:for (register . contents) :in assignments :collect
(cons i (cond
((< i arity) :argument)
((member contents permanent-variables) :permanent)
(t :local)))))
;;;; Flattening
;;; "Flattening" is the process of turning a series of register assignments into
;;; a sorted sequence appropriate for turning into a series of instructions.
;;;
;;; The order depends on whether we're compiling a query term or a program term.
;;;
;;; It's a stupid name because the assignments are already flattened as much as
;;; they ever will be. "Sorting" would be a better name. Maybe I'll change it
;;; once I'm done with the book.
;;;
;;; Turns:
;;;
;;; X0 -> p(X1, X2)
;;; X1 -> A
;;; X2 -> q(X1, X3)
;;; X3 -> B
;;;
;;; into something like:
;;;
;;; X2 -> q(X1, X3), X0 -> p(X1, X2)
(defun find-dependencies (assignments)
"Return a list of dependencies amongst the given registers.
Each entry will be a cons of `(a . b)` if register `a` depends on `b`.
"
(mapcan
(lambda (assignment)
(cond
; Variable assignments (X1 <- Foo) don't depend on anything else.
((variable-assignment-p assignment)
())
; Register assignments (A0 <- X5) have one obvious dependency.
((register-assignment-p assignment)
(list (cons (cdr assignment) (car assignment))))
; Structure assignments depend on all the functor's arguments.
((structure-assignment-p assignment)
(destructuring-bind (target . (functor . reqs))
assignment
(declare (ignore functor))
(loop :for req :in reqs
:collect (cons req target))))
(t (error "Cannot find dependencies for assignment ~S." assignment))))
assignments))
(defun flatten (assignments)
"Flatten the set of register assignments into a minimal set.
We remove the plain old variable assignments (in non-argument registers)
because they're not actually needed in the end.
"
(-<> assignments
(topological-sort <> (find-dependencies assignments) :key #'car)
(remove-if #'variable-assignment-p <>)))
(defun flatten-query (assignments)
(flatten assignments))
(defun flatten-program (assignments)
(reverse (flatten assignments)))
;;;; Tokenization
;;; Tokenizing takes a flattened set of assignments and turns it into a stream
;;; of structure assignments and bare registers.
;;;
;;; It turns:
;;;
;;; X2 -> q(X1, X3), X0 -> p(X1, X2)
;;;
;;; into something like:
;;;
;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2
(defun tokenize-assignments (assignments arity)
"Tokenize a flattened set of register assignments into a stream."
(mapcan
(lambda (ass)
;; Take a single assignment like:
;; X1 = f(a, b, c) (1 . (f a b c))
;; A0 = X5 (0 . 5)
;;
;; And turn it into a stream of tokens:
;; (X1 = f/3), a, b, c ((:structure 1 f 3) a b c)
;; (A0 = X5) ((:argument 0 5))
(if (register-assignment-p ass)
;; It might be a register assignment for an argument register.
(destructuring-bind (argument-register . target-register) ass
(assert (< argument-register arity) ()
"Cannot tokenize register assignment to non-argument register ~D in ???/~D:~%~S."
argument-register arity assignments)
(list (list :argument argument-register target-register)))
;; Otherwise it's a structure assignment. We know the others have
;; gotten flattened away by now.
(destructuring-bind (register . (functor . arguments)) ass
(cons (list :structure register functor (length arguments))
arguments))))
assignments))
(defun zip-register-types (tokens register-types)
(labels
((get-type (register)
(cdr (assoc register register-types)))
(update-leaf (leaf)
(if (numberp leaf)
(cons (get-type leaf) leaf)
leaf))
(fix-token (token)
(match token
(`(:structure ,register ,functor ,arity)
`(:structure (,(get-type register) . ,register)
,functor
,arity))
((guard n (numberp n))
(update-leaf n))
(other (map-tree #'update-leaf other)))))
(mapcar #'fix-token tokens)))
(defun tokenize-term (term permanent-variables flattener)
(multiple-value-bind (assignments functor arity)
(parse-term term)
(let* ((assignments (inline-structure-argument-assignments assignments
arity))
(register-types (register-types assignments
arity
permanent-variables))
(assignments (funcall flattener assignments))
(tokens (tokenize-assignments assignments arity)))
(values (zip-register-types tokens register-types)
functor
arity))))
(defun tokenize-program-term (term permanent-variables)
"Tokenize `term` as a program term, returning its tokens, functor, and arity."
(multiple-value-bind (tokens functor arity)
(tokenize-term term permanent-variables #'flatten-program)
;; We need to shove a PROCEED token onto the end.
(values (append tokens `((:proceed)))
functor
arity)))
(defun tokenize-query-term (term permanent-variables)
"Tokenize `term` as a query term, returning its stream of tokens."
(multiple-value-bind (tokens functor arity)
(tokenize-term term permanent-variables #'flatten-query)
;; We need to shove a CALL token onto the end.
(append tokens `((:call ,functor ,arity)))))
;;;; Bytecode
;;; Once we have a tokenized stream we can generate the machine instructions
;;; from it.
;;;
;;; We turn:
;;;
;;; (X2 = q/2), X1, X3, (X0 = p/2), X1, X2
;;;
;;; into something like:
;;;
;;; (#'%put-structure 2 q 2)
;;; (#'%set-variable 1)
;;; (#'%set-variable 3)
;;; (#'%put-structure 0 p 2)
;;; (#'%set-value 1)
;;; (#'%set-value 2)
(defun compile-tokens (wam head-tokens body-tokens store)
"Generate a series of machine instructions from a stream of head and body
tokens.
The `head-tokens` should be program-style tokens, and are compiled in program
mode. The `body-tokens` should be query-style tokens, and are compiled in
query mode.
Actual queries are a special case where the `head-tokens` stream is `nil`
The compiled instructions will be appended to `store` using
`code-push-instructions!`.
"
(let ((seen (list))
(mode nil))
(labels
((handle-argument (argument-type argument source-type source)
(assert (eql argument-type :argument) ()
"Attempted argument assignment to non-argument register.")
(assert (member source-type '(:local :permanent)) ()
"Attempted argument assignment from non-permanent/local register.")
; OP X_n A_i
(code-push-instruction! store
(if (push-if-new source seen)
(ecase mode
(:program +opcode-get-variable+)
(:query +opcode-put-variable+))
(ecase mode
(:program +opcode-get-value+)
(:query +opcode-put-value+)))
source
argument))
(handle-structure (register-type register functor arity)
(assert (member register-type '(:local :argument)) ()
"Attempted structure assignment to non-local/argument register.")
; OP functor reg
(push register seen)
(code-push-instruction! store
(ecase mode
(:program +opcode-get-structure+)
(:query +opcode-put-structure+))
(wam-ensure-functor-index wam (cons functor arity))
register))
(handle-call (functor arity)
(code-push-instruction! store
+opcode-call+
(wam-ensure-functor-index wam (cons functor arity))))
(handle-proceed ()
(code-push-instruction! store
+opcode-proceed+))
(handle-register (register-type register)
(declare (ignore register-type))
; OP reg
(code-push-instruction! store
(if (push-if-new register seen)
(ecase mode
(:program +opcode-unify-variable+)
(:query +opcode-set-variable+))
(ecase mode
(:program +opcode-unify-value+)
(:query +opcode-set-value+)))
register))
(handle-stream (tokens)
(loop :for token :in tokens :collect
(match token
(`(:argument (,argument-type . ,argument) (,source-type . ,source))
(handle-argument argument-type argument source-type source))
(`(:structure (,register-type . ,register) ,functor ,arity)
(handle-structure register-type register functor arity))
(`(:call ,functor ,arity)
(handle-call functor arity))
(`(:proceed)
(handle-proceed))
(`(,register-type . ,register)
(handle-register register-type register))))))
(when head-tokens
(setf mode :program)
(handle-stream head-tokens))
(setf mode :query)
(handle-stream body-tokens))))
(defun mark-label (wam functor arity store)
"Set the code label `(functor . arity)` to point at the next space in `store`."
;; todo make this less ugly
(setf (wam-code-label wam (wam-ensure-functor-index wam (cons functor arity)))
(fill-pointer store)))
;;;; UI
(defun make-query-code-store ()
(make-array 64
:fill-pointer 0
:adjustable t
:element-type 'code-word))
(defun compile-clause (wam store head body)
"Compile the clause into the given store array.
`head` should be the head of the clause for program clauses, or may be `nil`
for query clauses.
"
(let* ((permanent-variables
(find-permanent-variables (cons head body)))
(head-tokens
(when head
(multiple-value-bind (tokens functor arity)
(tokenize-program-term head permanent-variables)
(mark-label wam functor arity store) ; TODO: this is ugly
tokens)))
(body-tokens
(loop :for term :in body :append
(tokenize-query-term term permanent-variables))))
(compile-tokens wam head-tokens body-tokens store))
(values))
(defun compile-query (wam query)
"Compile `query` into a fresh array of bytecode.
`query` should be a list of goal terms.
"
(let ((store (make-query-code-store)))
(compile-clause wam store nil query)
store))
(defun compile-program (wam rule)
"Compile `rule` into the WAM's code store.
`rule` should be a clause consisting of a head term and zero or more body
terms. A rule with no body is also called a \"fact\".
"
(compile-clause wam (wam-code wam) (first rule) (rest rule))
(values))