This repository has been archived by the owner on May 13, 2021. It is now read-only.
forked from EivMeyer/gym-auv
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtrain3d.py
66 lines (56 loc) · 2.38 KB
/
train3d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import gym
import gym_auv
import stable_baselines.results_plotter as results_plotter
import numpy as np
import tensorflow as tf
from stable_baselines.bench import Monitor
from stable_baselines.common.policies import MlpPolicy, LstmPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines import PPO2
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines.common.schedules import LinearSchedule
from utils import parse_experiment_info
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
scenarios = ["beginner", "intermediate", "proficient", "advanced", "expert"]
hyperparams = {
'n_steps': 1024,
'nminibatches': 256,
'learning_rate': 1e-5,
'nminibatches': 32,
'lam': 0.95,
'gamma': 0.99,
'noptepochs': 4,
'cliprange': 0.2,
'ent_coef': 0.01,
'verbose': 2
}
def callback(_locals, _globals):
global n_steps, best_mean_reward
if (n_steps + 1) % 5 == 0:
_locals['self'].save(os.path.join(agents_dir, "model_" + str(n_steps+1) + ".pkl"))
n_steps += 1
return True
if __name__ == '__main__':
experiment_dir, _, _ = parse_experiment_info()
for i, scen in enumerate(scenarios):
agents_dir = os.path.join(experiment_dir, scen, "agents")
tensorboard_dir = os.path.join(experiment_dir, scen, "tensorboard")
os.makedirs(agents_dir, exist_ok=True)
os.makedirs(tensorboard_dir, exist_ok=True)
hyperparams["tensorboard_log"] = tensorboard_dir
num_envs = 4
if num_envs > 1:
env = SubprocVecEnv([lambda: Monitor(gym.make("PathColav3d-v0", scenario=scen), agents_dir, allow_early_resets=True) for i in range(num_envs)])
else:
env = DummyVecEnv([lambda: Monitor(gym.make("PathColav3d-v0", scenario=scen), agents_dir, allow_early_resets=True)])
if scen == "beginner":
agent = PPO2(MlpPolicy, env, **hyperparams)
else:
continual_model = os.path.join(experiment_dir, scenarios2[i+1], "agents", "last_model.pkl")
agent = PPO2.load(continual_model, env=env, **hyperparams)
agent.setup_model()
best_mean_reward, n_steps, timesteps = -np.inf, 0, int(300e3 + i*150e3)
agent.learn(total_timesteps=timesteps, tb_log_name="PPO2", callback=callback2)
save_path = os.path.join(agents_dir, "last_model.pkl")
agent.save(save_path)