forked from grschafer/BejeweledBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl.py
100 lines (80 loc) · 2.98 KB
/
rl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Greg Schafer
# CSCE 478
# Dec 10, 2012
#
# Bejeweled Bot
#
# Adapted from PyBrain Tutorial:
# http://pybrain.org/docs/tutorial/reinforcement-learning.html
import cPickle
import sys
import argparse
from pybrain.rl.learners import Q
# argparse stuff
# demo or training flag
# number of gems
# animation speed
# starting weights file
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--demo",
action="store_true",
help="Show the learner playing on a graphical board")
parser.add_argument("-f", "--paramfile",
help="Provide a file containing existing training weights \
formatted as a Python pickled object (e.g. the \
params_trained file in this directory)")
parser.add_argument("-o", "--outfile",
help="Save training weights to this file on program exit")
parser.add_argument("-b", "--boardsize",
type=int,
default=8,
help="Set height and width of Bejeweled board, must be at \
least 4 (for a 4x4 board); has no effect if --demo flag \
not set (all training is done on 4x4); default=8")
parser.add_argument("-g", "--gemtypes",
type=int,
default=7,
choices=xrange(5,8),
help="Set number of different color gems, can be between \
5-7; default=7")
parser.add_argument("-s", "--speed",
type=int,
default=25,
help="Set animation speed, can be between 1-100; has no \
effect if --demo flag not set; default=25")
args = parser.parse_args()
assert 4 <= args.boardsize, "--boardsize parameter must be 4 or larger"
assert 1 <= args.speed <= 100, "--speed parameter must be between 1-100"
if args.demo:
from gfx.task import BejeweledTask
from gfx.environment import BejeweledBoard
from gfx.experiment import Experiment
from gfx.agent import BejeweledAgent
from gfx.controller import BejeweledActionValueTable
else:
from train.task import BejeweledTask
from train.environment import BejeweledBoard
from train.experiment import Experiment
from train.agent import BejeweledAgent
from train.controller import BejeweledActionValueTable
environment = BejeweledBoard(args.boardsize, args.gemtypes, args.speed)
controller = BejeweledActionValueTable(2**16, 24)
controller.initialize(1.)
if args.paramfile:
with open(args.paramfile, 'r') as f:
controller._setParameters(cPickle.load(f))
learner = Q()
agent = BejeweledAgent(controller, learner)
task = BejeweledTask(environment)
experiment = Experiment(task, agent)
try:
while True:
experiment.doInteractions(1)
agent.learn()
agent.reset()
except:
if args.outfile:
with open(args.outfile, 'w') as f:
cPickle.dump(controller.params, f)
a, b, c = sys.exc_info()
raise a, b, c