examples/monte-carlo-player.lisp @ 4ce62327f4bd
Add `abort` support, tweak examples for ELS
author |
Steve Losh <steve@stevelosh.com> |
date |
Mon, 03 Apr 2017 23:52:37 +0200 |
parents |
fc6ac5c922d6 |
children |
(none) |
(in-package :cl-user)
(ql:quickload '(:cl-ggp :cl-ggp.reasoner))
;;;; Simulations --------------------------------------------------------------
(defun random-elt (sequence)
(elt sequence (random (length sequence))))
(defun random-joint-move (reasoner state)
(mapcar (lambda (role)
(cons role (random-elt
(ggp.reasoner:legal-moves-for reasoner state role))))
(ggp.reasoner:roles reasoner)))
(defun random-playout-value (reasoner role state &optional our-move)
(if (ggp.reasoner:terminalp reasoner state)
(ggp.reasoner:goal-value-for reasoner state role)
(let ((joint-move (random-joint-move reasoner state)))
(when our-move
(setf (cdr (assoc role joint-move)) our-move))
(random-playout-value
reasoner
role
(ggp.reasoner:next-state reasoner state joint-move)))))
;;;; Player -------------------------------------------------------------------
(defclass monte-carlo-player (ggp:ggp-player)
((role :accessor p-role)
(current-state :accessor p-current-state)
(reasoner :accessor p-reasoner)))
(defmethod ggp:player-start-game
((player monte-carlo-player) rules role deadline)
(declare (ignore deadline))
(setf (p-role player) role
(p-reasoner player) (ggp.reasoner:make-reasoner rules)))
(defmethod ggp:player-update-game
((player monte-carlo-player) moves)
(setf (p-current-state player)
(if (null moves)
(ggp.reasoner:initial-state (p-reasoner player))
(ggp.reasoner:next-state (p-reasoner player)
(p-current-state player)
moves))))
(defun conservative-deadline (deadline seconds-of-breathing-room)
(- deadline (* seconds-of-breathing-room
internal-time-units-per-second)))
(defmethod ggp:player-select-move
((player monte-carlo-player) deadline)
(loop
:with conservative-deadline = (conservative-deadline deadline 2)
:with reasoner = (p-reasoner player)
:with state = (p-current-state player)
:with role = (p-role player)
:with our-moves = (ggp.reasoner:legal-moves-for reasoner state role)
:with scores = (mapcar (lambda (move) (cons move 0))
our-moves)
; '(((mark 1 1) . 0)
; ((mark 1 2) . 0)
; ...)
:until (>= (get-internal-real-time) conservative-deadline)
:do (dolist (move our-moves)
(incf (cdr (assoc move scores))
(random-playout-value reasoner role state move)))
:finally (return (car (first (sort scores #'> :key #'cdr))))))
(defmethod ggp:player-select-move
((player monte-carlo-player) deadline)
(loop
:with conservative-deadline = (conservative-deadline deadline 2)
:with reasoner = (p-reasoner player)
:with state = (p-current-state player)
:with role = (p-role player)
:with our-moves = (ggp.reasoner:legal-moves-for reasoner state role)
:with scores = (mapcar (lambda (move) (cons move 0))
our-moves)
:for count :from 1
:until (>= (get-internal-real-time) conservative-deadline)
:when (= 1 (length our-moves)) :do (return (first our-moves))
:do (dolist (move our-moves)
(incf (cdr (assoc move scores))
(random-playout-value reasoner role state move)))
:finally (progn
(format t "~%Ran ~D * ~D = ~D simulations~%"
count (length our-moves) (* count (length our-moves)))
(format t "~%Results: ~S~%" (mapcar (lambda (score)
(cons (car score)
(/ (cdr score) count 1.0)))
scores))
(finish-output)
(return (car (first (sort scores #'> :key #'cdr)))))))
(defmethod ggp:player-stop-game
((player monte-carlo-player))
(setf (p-current-state player) nil
(p-reasoner player) nil
(p-role player) nil))
;;;; Scratch ------------------------------------------------------------------
(defvar *monte-carlo-player*
(make-instance 'monte-carlo-player
:name "ELSMonteCarloPlayer"
:port 4000))
;; (ggp:start-player *monte-carlo-player*)
;; (ggp:kill-player *monte-carlo-player*)