fc6ac5c922d6

Add Monte Carlo example and GDL cleaning
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Thu, 23 Mar 2017 18:55:04 +0000 (2017-03-23)
parents abdfc9d3ab4b
children 4ce62327f4bd
branches/tags (none)
files docs/04-reference-reasoner.markdown examples/monte-carlo-player.lisp package.reasoner.lisp src/reasoner.lisp

Changes

--- a/docs/04-reference-reasoner.markdown	Tue Mar 14 13:33:08 2017 +0000
+++ b/docs/04-reference-reasoner.markdown	Thu Mar 23 18:55:04 2017 +0000
@@ -60,6 +60,12 @@
 
   
 
+### `ROLES` (function)
+
+    (ROLES REASONER)
+
+Return a fresh list of all the roles of `reasoner`.
+
 ### `TERMINALP` (function)
 
     (TERMINALP REASONER STATE)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/monte-carlo-player.lisp	Thu Mar 23 18:55:04 2017 +0000
@@ -0,0 +1,90 @@
+(in-package :cl-user)
+
+(ql:quickload '(:cl-ggp :cl-ggp.reasoner))
+
+;;;; Simulations --------------------------------------------------------------
+(defun random-elt (sequence)
+  (elt sequence (random (length sequence))))
+
+
+;;;; Simulations --------------------------------------------------------------
+(defun random-move (reasoner state)
+  (mapcar (lambda (role)
+            (cons role (random-elt
+                         (ggp.reasoner:legal-moves-for reasoner state role))))
+          (ggp.reasoner:roles reasoner)))
+
+(defun random-playout-value (reasoner role state &optional our-move)
+  (if (ggp.reasoner:terminalp reasoner state)
+    (ggp.reasoner:goal-value-for reasoner state role)
+    (let ((move (random-move reasoner state)))
+      (when our-move
+        (setf (cdr (assoc role move)) our-move))
+      (random-playout-value reasoner role
+                            (ggp.reasoner:next-state reasoner state move)))))
+
+
+;;;; Player -------------------------------------------------------------------
+(defclass monte-carlo-player (ggp:ggp-player)
+  ((role          :accessor p-role)
+   (current-state :accessor p-current-state)
+   (reasoner      :accessor p-reasoner)))
+
+(defmethod ggp:player-start-game
+    ((player monte-carlo-player) rules role deadline)
+  (setf (p-role player) role
+        (p-reasoner player) (ggp.reasoner:make-reasoner rules)))
+
+(defmethod ggp:player-update-game
+    ((player monte-carlo-player) moves)
+  (setf (p-current-state player)
+        (if (null moves)
+          (ggp.reasoner:initial-state (p-reasoner player))
+          (ggp.reasoner:next-state (p-reasoner player)
+                                   (p-current-state player)
+                                   moves))))
+
+
+(defun conservative-deadline (deadline &optional (seconds-of-breathing-room 1))
+  (- deadline (* seconds-of-breathing-room internal-time-units-per-second)))
+
+(defmethod ggp:player-select-move
+    ((player monte-carlo-player) deadline)
+  (loop
+    :with conservative-deadline = (conservative-deadline deadline)
+    :with reasoner = (p-reasoner player)
+    :with state = (p-current-state player)
+    :with role = (p-role player)
+    :with our-moves = (ggp.reasoner:legal-moves-for reasoner state role)
+    :with scores = (mapcar (lambda (move) (cons move 0))
+                           our-moves)
+    :for count :from 1
+    :until (>= (get-internal-real-time) conservative-deadline)
+    :do (dolist (move our-moves)
+          (incf (cdr (assoc move scores))
+                (random-playout-value reasoner role state move)))
+    :finally (progn
+               (format t "~%Ran ~D * ~D = ~D simulations~%"
+                       count (length our-moves) (* count (length our-moves)))
+               (format t "~%Results: ~S~%" (mapcar (lambda (score)
+                                                     (cons (car score)
+                                                           (/ (cdr score) count 1.0)))
+                                                   scores))
+               (finish-output)
+               (return (car (first (sort scores #'> :key #'cdr)))))))
+
+(defmethod ggp:player-stop-game
+    ((player monte-carlo-player))
+  (setf (p-current-state player) nil
+        (p-reasoner player) nil
+        (p-role player) nil))
+
+
+;;;; Scratch ------------------------------------------------------------------
+(defvar *monte-carlo-player*
+  (make-instance 'monte-carlo-player
+    :name "MonteCarloPlayer"
+    :port 4000))
+
+;; (ggp:start-player *monte-carlo-player*)
+;; (ggp:kill-player *monte-carlo-player*)
--- a/package.reasoner.lisp	Tue Mar 14 13:33:08 2017 +0000
+++ b/package.reasoner.lisp	Thu Mar 23 18:55:04 2017 +0000
@@ -6,7 +6,8 @@
     :next-state
     :terminalp
     :legal-moves-for
-    :goal-value-for)
+    :goal-value-for
+    :roles)
   (:documentation "This package contains a simple GGP reasoner.  It can be useful as a starting point for writing general game players."))
 
 
--- a/src/reasoner.lisp	Tue Mar 14 13:33:08 2017 +0000
+++ b/src/reasoner.lisp	Thu Mar 23 18:55:04 2017 +0000
@@ -12,6 +12,40 @@
   (dedupe state))
 
 
+;;;; GDL Cleaning -------------------------------------------------------------
+;;; Some GDL authors use (or x y) and (and x y) in their game descriptions, even
+;;; though it's not part of the GDL "spec".  Worse still, some use n-ary
+;;; versions of those predicates, because fuck you.  So we'll do a quick pass
+;;; over the GDL to clean up these bugs.
+
+(defun clean-or (gdl)
+  (destructuring-bind (or . arguments)
+      gdl
+    (case (length arguments)
+      (1 (first arguments))
+      (2 gdl)
+      (t (list or (first arguments)
+               (clean-or (cons or (rest arguments))))))))
+
+(defun clean-and (gdl)
+  (destructuring-bind (and . arguments)
+      gdl
+    (case (length arguments)
+      (1 (first arguments))
+      (2 gdl)
+      (t (list and (first arguments)
+               (clean-and (cons and (rest arguments))))))))
+
+(defun clean-gdl (gdl)
+  (if (consp gdl)
+    (case (car gdl)
+      (ggp-rules::or (clean-or gdl))
+      (ggp-rules::and (clean-and gdl))
+      (t (cons (clean-gdl (car gdl))
+               (clean-gdl (cdr gdl)))))
+    gdl))
+
+
 ;;;; Reasoner -----------------------------------------------------------------
 (defun load-gdl-preamble (db)
   (push-logic-frame-with db
@@ -38,11 +72,6 @@
    (current-moves :initform nil :accessor reasoner-moves)))
 
 
-(defun clean-gdl (rules)
-  ;; todo this
-  rules)
-
-
 (defun load-rule (rule)
   (if (gdl-rule-p rule)
     (apply #'invoke-rule t (rest rule))
@@ -51,7 +80,7 @@
 (defun load-rules-into-reasoner (reasoner rules)
   (with-database (reasoner-database reasoner)
     (push-logic-frame-with t
-      (map nil #'load-rule rules))))
+      (map nil #'load-rule (clean-gdl rules)))))
 
 
 (defun make-reasoner (rules)
@@ -171,6 +200,13 @@
     (car (invoke-query-for t '?value `(ggp-rules::goal ,role ?value)))))
 
 
+(defun roles (reasoner)
+  "Return a fresh list of all the roles of `reasoner`."
+  (remove-duplicates
+    (query-for (reasoner-database reasoner) ?who
+               (ggp-rules::role ?who))))
+
+
 (defun terminalp (reasoner state)
   "Return whether `state` is terminal."
   (with-database (reasoner-database reasoner)