-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathmain.py
44 lines (30 loc) · 1.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import os
from environment import Environment
from driver import Driver
def dispatcher(env):
driver = Driver(env)
while driver.itr < env.n_train_iters:
# Train
if env.train_mode:
driver.train_step()
# Test
if driver.itr % env.test_interval == 0:
# measure performance
R = []
for n in range(env.n_episodes_test):
R.append(driver.collect_experience(record=True, vis=env.vis_flag, noise_flag=False, n_steps=1000))
# update stats
driver.reward_mean = sum(R) / len(R)
driver.reward_std = np.std(R)
# print info line
driver.print_info_line('full')
# save snapshot
if env.train_mode and env.save_models:
driver.save_model(dir_name=env.config_dir)
driver.itr += 1
if __name__ == '__main__':
# load environment
env = Environment(os.path.curdir, 'Hopper-v1')
# start training
dispatcher(env=env)