-
Notifications
You must be signed in to change notification settings - Fork 2
/
test.py
69 lines (57 loc) · 1.42 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#! -*- coding: UTF-8 -*-
from Worker import Worker
from A3CAgentContinuous import A3CAgentContinuous
import tensorflow as tf
import gym
max_episode = 50
env_name = "BipedalWalker-v2"
weight_path = "model/a3c-{}-weight".format(env_name.lower())
gamma = 0.99 # Future reward discount rate.
alpha_actor = 0.00001 # Learning rate for actors.
alpha_critic = 0.0001 # Learning rate for critics.
sess = tf.Session()
env = gym.make(env_name)
obvSpace_dim = env.observation_space.shape
try:
actSpace_dim = env.action_space.shape[0]
except:
actSpace_dim = env.action_space.n
actSpace_low = env.action_space.low
actSpace_high = env.action_space.high
with sess.as_default(), sess.graph.as_default():
# Create main agent.
Agent = A3CAgentContinuous(
"Main",
sess,
obvSpace_dim,
actSpace_dim,
actSpace_low,
actSpace_high,
gamma,
alpha_actor,
alpha_critic,
training=False
)
saver = tf.train.Saver()
try:
saver.restore(sess, weight_path)
print("[+] Weights loaded.")
except:
print("[x] Weights couldn't load, exit...")
exit()
for episode in range(max_episode):
state = env.reset()
episode_reward_sum = 0.0
done = False
while not done:
env.render()
action, _ = Agent.act(state)
next_state, reward, done, _ = env.step(action)
if reward == -100:
reward = -2
episode_reward_sum += reward
state = next_state
print(
"Episode {}".format(episode),
"EpRew {0:.2f}".format(episode_reward_sum)
)