-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgym_play.py
202 lines (162 loc) · 6.75 KB
/
gym_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
'''
/home/seba/DeepLearning/SuperMario/SuperMario/my_super_mario/python_env/lib/python3.6/site-packages/gym/utils/play.py
'''
import gym
import pygame
import sys
import time
import matplotlib
import numpy as np
try:
matplotlib.use('GTK3Agg')
import matplotlib.pyplot as plt
except Exception:
pass
import pyglet.window as pw
from collections import deque
from pygame.locals import HWSURFACE, DOUBLEBUF, RESIZABLE, VIDEORESIZE
from threading import Thread
def display_arr(screen, arr, video_size, transpose):
arr_min, arr_max = arr.min(), arr.max()
arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
pyg_img = pygame.transform.scale(pyg_img, video_size)
screen.blit(pyg_img, (0,0))
def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
"""Allows one to play the game using keyboard.
To simply play the game use:
play(gym.make("Pong-v3"))
Above code works also if env is wrapped, so it's particularly useful in
verifying that the frame-level preprocessing does not render the game
unplayable.
If you wish to plot real time statistics as you play, you can use
gym.utils.play.PlayPlot. Here's a sample code for plotting the reward
for last 5 second of gameplay.
def callback(obs_t, obs_tp1, rew, done, info):
return [rew,]
env_plotter = EnvPlotter(callback, 30 * 5, ["reward"])
env = gym.make("Pong-v3")
play(env, callback=env_plotter.callback)
Arguments
---------
env: gym.Env
Environment to use for playing.
transpose: bool
If True the output of observation is transposed.
Defaults to true.
fps: int
Maximum number of steps of the environment to execute every second.
Defaults to 30.
zoom: float
Make screen edge this many times bigger
callback: lambda or None
Callback if a callback is provided it will be executed after
every step. It takes the following input:
obs_t: observation before performing action
obs_tp1: observation after performing action
action: action that was executed
rew: reward that was received
done: whether the environemnt is done or not
info: debug info
keys_to_action: dict: tuple(int) -> int or None
Mapping from keys pressed to action performed.
For example if pressed 'w' and space at the same time is supposed
to trigger action number 2 then key_to_action dict would look like this:
{
# ...
sorted(ord('w'), ord(' ')) -> 2
# ...
}
If None, default key_to_action mapping for that env is used, if provided.
"""
obs_s = env.observation_space
assert type(obs_s) == gym.spaces.box.Box
# assert len(obs_s.shape) == 2 or (len(obs_s.shape) == 3 and obs_s.shape[2] in [1,3])
if keys_to_action is None:
if hasattr(env, 'get_keys_to_action'):
keys_to_action = env.get_keys_to_action()
elif hasattr(env.unwrapped, 'get_keys_to_action'):
keys_to_action = env.unwrapped.get_keys_to_action()
else:
assert False, env.spec.id + " does not have explicit key to action mapping, " + \
"please specify one manually"
relevant_keys = set(sum(map(list, keys_to_action.keys()),[]))
if transpose:
video_size = env.observation_space.shape[1], env.observation_space.shape[0]
else:
video_size = env.observation_space.shape[0], env.observation_space.shape[1]
if zoom is not None:
video_size = int(video_size[0] * zoom), int(video_size[1] * zoom)
pressed_keys = []
running = True
env_done = True
screen = pygame.display.set_mode(video_size)
clock = pygame.time.Clock()
while running:
if env_done:
env_done = False
obs = env.reset()
np_obs = np.array(obs)
obs = obs._frames
else:
action = keys_to_action[tuple(sorted(pressed_keys))]
prev_obs = np_obs
obs, rew, env_done, info = env.step(action)
np_obs = np.array(obs)
obs = obs._frames
if callback is not None:
callback(prev_obs, np_obs, action, rew, env_done, info)
if obs is not None:
if len(obs.shape) == 2:
obs = obs[:, :, None]
if obs.shape[2] == 1:
obs = obs.repeat(3, axis=2)
display_arr(screen, obs, transpose=transpose, video_size=video_size)
# process pygame events
for event in pygame.event.get():
# test events, set key states
if event.type == pygame.KEYDOWN:
if event.key in relevant_keys:
pressed_keys.append(event.key)
elif event.key == 27:
running = False
elif event.type == pygame.KEYUP:
if event.key in relevant_keys:
pressed_keys.remove(event.key)
elif event.type == pygame.QUIT:
running = False
elif event.type == VIDEORESIZE:
video_size = event.size
screen = pygame.display.set_mode(video_size)
pygame.display.flip()
clock.tick(fps)
pygame.quit()
class PlayPlot(object):
def __init__(self, callback, horizon_timesteps, plot_names):
self.data_callback = callback
self.horizon_timesteps = horizon_timesteps
self.plot_names = plot_names
num_plots = len(self.plot_names)
self.fig, self.ax = plt.subplots(num_plots)
if num_plots == 1:
self.ax = [self.ax]
for axis, name in zip(self.ax, plot_names):
axis.set_title(name)
self.t = 0
self.cur_plot = [None for _ in range(num_plots)]
self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
def callback(self, obs_t, obs_tp1, action, rew, done, info):
points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
for point, data_series in zip(points, self.data):
data_series.append(point)
self.t += 1
xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
for i, plot in enumerate(self.cur_plot):
if plot is not None:
plot.remove()
self.cur_plot[i] = self.ax[i].scatter(range(xmin, xmax), list(self.data[i]))
self.ax[i].set_xlim(xmin, xmax)
plt.pause(0.000001)
if __name__ == '__main__':
env = gym.make("MontezumaRevengeNoFrameskip-v4")
play(env, zoom=4, fps=60)