Add Monte Carlo example and GDL cleaning
author |
Steve Losh <steve@stevelosh.com> |
date |
Thu, 23 Mar 2017 18:55:04 +0000 (2017-03-23) |
parents |
abdfc9d3ab4b
|
children |
4ce62327f4bd
|
branches/tags |
(none) |
files |
docs/04-reference-reasoner.markdown examples/monte-carlo-player.lisp package.reasoner.lisp src/reasoner.lisp |
Changes
--- a/docs/04-reference-reasoner.markdown Tue Mar 14 13:33:08 2017 +0000
+++ b/docs/04-reference-reasoner.markdown Thu Mar 23 18:55:04 2017 +0000
@@ -60,6 +60,12 @@
+### `ROLES` (function)
+
+ (ROLES REASONER)
+
+Return a fresh list of all the roles of `reasoner`.
+
### `TERMINALP` (function)
(TERMINALP REASONER STATE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/monte-carlo-player.lisp Thu Mar 23 18:55:04 2017 +0000
@@ -0,0 +1,90 @@
+(in-package :cl-user)
+
+(ql:quickload '(:cl-ggp :cl-ggp.reasoner))
+
+;;;; Simulations --------------------------------------------------------------
+(defun random-elt (sequence)
+ (elt sequence (random (length sequence))))
+
+
+;;;; Simulations --------------------------------------------------------------
+(defun random-move (reasoner state)
+ (mapcar (lambda (role)
+ (cons role (random-elt
+ (ggp.reasoner:legal-moves-for reasoner state role))))
+ (ggp.reasoner:roles reasoner)))
+
+(defun random-playout-value (reasoner role state &optional our-move)
+ (if (ggp.reasoner:terminalp reasoner state)
+ (ggp.reasoner:goal-value-for reasoner state role)
+ (let ((move (random-move reasoner state)))
+ (when our-move
+ (setf (cdr (assoc role move)) our-move))
+ (random-playout-value reasoner role
+ (ggp.reasoner:next-state reasoner state move)))))
+
+
+;;;; Player -------------------------------------------------------------------
+(defclass monte-carlo-player (ggp:ggp-player)
+ ((role :accessor p-role)
+ (current-state :accessor p-current-state)
+ (reasoner :accessor p-reasoner)))
+
+(defmethod ggp:player-start-game
+ ((player monte-carlo-player) rules role deadline)
+ (setf (p-role player) role
+ (p-reasoner player) (ggp.reasoner:make-reasoner rules)))
+
+(defmethod ggp:player-update-game
+ ((player monte-carlo-player) moves)
+ (setf (p-current-state player)
+ (if (null moves)
+ (ggp.reasoner:initial-state (p-reasoner player))
+ (ggp.reasoner:next-state (p-reasoner player)
+ (p-current-state player)
+ moves))))
+
+
+(defun conservative-deadline (deadline &optional (seconds-of-breathing-room 1))
+ (- deadline (* seconds-of-breathing-room internal-time-units-per-second)))
+
+(defmethod ggp:player-select-move
+ ((player monte-carlo-player) deadline)
+ (loop
+ :with conservative-deadline = (conservative-deadline deadline)
+ :with reasoner = (p-reasoner player)
+ :with state = (p-current-state player)
+ :with role = (p-role player)
+ :with our-moves = (ggp.reasoner:legal-moves-for reasoner state role)
+ :with scores = (mapcar (lambda (move) (cons move 0))
+ our-moves)
+ :for count :from 1
+ :until (>= (get-internal-real-time) conservative-deadline)
+ :do (dolist (move our-moves)
+ (incf (cdr (assoc move scores))
+ (random-playout-value reasoner role state move)))
+ :finally (progn
+ (format t "~%Ran ~D * ~D = ~D simulations~%"
+ count (length our-moves) (* count (length our-moves)))
+ (format t "~%Results: ~S~%" (mapcar (lambda (score)
+ (cons (car score)
+ (/ (cdr score) count 1.0)))
+ scores))
+ (finish-output)
+ (return (car (first (sort scores #'> :key #'cdr)))))))
+
+(defmethod ggp:player-stop-game
+ ((player monte-carlo-player))
+ (setf (p-current-state player) nil
+ (p-reasoner player) nil
+ (p-role player) nil))
+
+
+;;;; Scratch ------------------------------------------------------------------
+(defvar *monte-carlo-player*
+ (make-instance 'monte-carlo-player
+ :name "MonteCarloPlayer"
+ :port 4000))
+
+;; (ggp:start-player *monte-carlo-player*)
+;; (ggp:kill-player *monte-carlo-player*)
--- a/package.reasoner.lisp Tue Mar 14 13:33:08 2017 +0000
+++ b/package.reasoner.lisp Thu Mar 23 18:55:04 2017 +0000
@@ -6,7 +6,8 @@
:next-state
:terminalp
:legal-moves-for
- :goal-value-for)
+ :goal-value-for
+ :roles)
(:documentation "This package contains a simple GGP reasoner. It can be useful as a starting point for writing general game players."))
--- a/src/reasoner.lisp Tue Mar 14 13:33:08 2017 +0000
+++ b/src/reasoner.lisp Thu Mar 23 18:55:04 2017 +0000
@@ -12,6 +12,40 @@
(dedupe state))
+;;;; GDL Cleaning -------------------------------------------------------------
+;;; Some GDL authors use (or x y) and (and x y) in their game descriptions, even
+;;; though it's not part of the GDL "spec". Worse still, some use n-ary
+;;; versions of those predicates, because fuck you. So we'll do a quick pass
+;;; over the GDL to clean up these bugs.
+
+(defun clean-or (gdl)
+ (destructuring-bind (or . arguments)
+ gdl
+ (case (length arguments)
+ (1 (first arguments))
+ (2 gdl)
+ (t (list or (first arguments)
+ (clean-or (cons or (rest arguments))))))))
+
+(defun clean-and (gdl)
+ (destructuring-bind (and . arguments)
+ gdl
+ (case (length arguments)
+ (1 (first arguments))
+ (2 gdl)
+ (t (list and (first arguments)
+ (clean-and (cons and (rest arguments))))))))
+
+(defun clean-gdl (gdl)
+ (if (consp gdl)
+ (case (car gdl)
+ (ggp-rules::or (clean-or gdl))
+ (ggp-rules::and (clean-and gdl))
+ (t (cons (clean-gdl (car gdl))
+ (clean-gdl (cdr gdl)))))
+ gdl))
+
+
;;;; Reasoner -----------------------------------------------------------------
(defun load-gdl-preamble (db)
(push-logic-frame-with db
@@ -38,11 +72,6 @@
(current-moves :initform nil :accessor reasoner-moves)))
-(defun clean-gdl (rules)
- ;; todo this
- rules)
-
-
(defun load-rule (rule)
(if (gdl-rule-p rule)
(apply #'invoke-rule t (rest rule))
@@ -51,7 +80,7 @@
(defun load-rules-into-reasoner (reasoner rules)
(with-database (reasoner-database reasoner)
(push-logic-frame-with t
- (map nil #'load-rule rules))))
+ (map nil #'load-rule (clean-gdl rules)))))
(defun make-reasoner (rules)
@@ -171,6 +200,13 @@
(car (invoke-query-for t '?value `(ggp-rules::goal ,role ?value)))))
+(defun roles (reasoner)
+ "Return a fresh list of all the roles of `reasoner`."
+ (remove-duplicates
+ (query-for (reasoner-database reasoner) ?who
+ (ggp-rules::role ?who))))
+
+
(defun terminalp (reasoner state)
"Return whether `state` is terminal."
(with-database (reasoner-database reasoner)