Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/115 sac #150

Merged
merged 15 commits into from
Mar 29, 2022
56 changes: 56 additions & 0 deletions jorldy/config/sac/atari.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
### SAC Atari Config ###

env = {
# "name": it should be defined in the command. ex) python main.py --config config.AGENT.atari --env.name breakout
"render": False,
"gray_img": True,
"img_width": 84,
"img_height": 84,
"stack_frame": 4,
"no_op": True,
"skip_frame": 4,
"reward_clip": True,
"episodic_life": True,
}

agent = {
"name": "sac",
"actor": "discrete_policy",
"critic": "discrete_q_network",
"head": "cnn",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 1000000,
"batch_size": 32,
"start_train_step": 100000,
"static_log_alpha": -6.0,
"target_update_period": 10000,
}


optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
# "actor_lr": 5e-4,
# "critic_lr": 1e-3,
# "alpha_lr": 3e-4,
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
"training": True,
"load_path": None,
"run_step": 10000000,
"print_period": 10000,
"save_period": 100000,
"eval_iteration": 5,
"record": True,
"record_period": 500000,
# distributed setting
"update_period": 32,
"num_workers": 16,
}
9 changes: 5 additions & 4 deletions jorldy/config/sac/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@
agent = {
"name": "sac",
"actor": "continuous_policy",
"critic": "sac_critic",
"critic": "continuous_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 50000,
"batch_size": 64,
"start_train_step": 5000,
"static_log_alpha": -2.0,
"target_update_period": 500,
}

optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
"actor_lr": 5e-4,
"critic_lr": 1e-3,
"alpha_lr": 3e-4,
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
Expand Down
42 changes: 42 additions & 0 deletions jorldy/config/sac/cartpole_discrete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
### SAC CartPole Config ###

env = {
"name": "cartpole",
"action_type": "discrete",
"render": False,
}

agent = {
"name": "sac",
"actor": "discrete_policy",
"critic": "discrete_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 50000,
"batch_size": 64,
"start_train_step": 5000,
"static_log_alpha": -2.0,
"target_update_period": 500,
}

optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
"training": True,
"load_path": None,
"run_step": 100000,
"print_period": 1000,
"save_period": 10000,
"eval_iteration": 10,
# distributed setting
"update_period": 32,
"num_workers": 8,
}
2 changes: 1 addition & 1 deletion jorldy/config/sac/drone_delivery_mlagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
agent = {
"name": "sac",
"actor": "continuous_policy",
"critic": "sac_critic",
"critic": "continuous_q_network",
"head": "multi",
"use_dynamic_alpha": True,
"gamma": 0.99,
Expand Down
2 changes: 1 addition & 1 deletion jorldy/config/sac/hopper_mlagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
agent = {
"name": "sac",
"actor": "continuous_policy",
"critic": "sac_critic",
"critic": "continuous_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
Expand Down
3 changes: 1 addition & 2 deletions jorldy/config/sac/mujoco.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
agent = {
"name": "sac",
"actor": "continuous_policy",
"critic": "sac_critic",
"head": "multi",
"critic": "continuous_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
Expand Down
2 changes: 1 addition & 1 deletion jorldy/config/sac/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
agent = {
"name": "sac",
"actor": "continuous_policy",
"critic": "sac_critic",
"critic": "continuous_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
Expand Down
38 changes: 38 additions & 0 deletions jorldy/config/sac/pong_mlagent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
### SAC Pong_ML-Agents Config ###

env = {"name": "pong_mlagent", "time_scale": 12.0}

agent = {
"name": "sac",
"actor": "discrete_policy",
"critic": "discrete_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 50000,
"batch_size": 32,
"start_train_step": 25000,
"static_log_alpha": -3.0,
"target_update_period": 1000,
}

optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
"training": True,
"load_path": None,
"run_step": 200000,
"print_period": 5000,
"save_period": 50000,
"eval_iteration": 10,
# distributed setting
"update_period": 8,
"num_workers": 16,
}
49 changes: 49 additions & 0 deletions jorldy/config/sac/procgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
### SAC Procgen Config ###

env = {
# "name": it should be defined in the command. ex) python main.py --config config.dqn.procgen --env.name coinrun
"render": False,
"gray_img": True,
"stack_frame": 4,
"no_op": True,
"skip_frame": 4,
"reward_clip": True,
}

agent = {
"name": "sac",
"actor": "discrete_policy",
"critic": "discrete_q_network",
"head": "cnn",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 1000000,
"batch_size": 32,
"start_train_step": 100000,
"static_log_alpha": -6.0,
"target_update_period": 10000,
}

optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
"training": True,
"load_path": None,
"run_step": 30000000,
"print_period": 10000,
"save_period": 100000,
"eval_iteration": 5,
"record": True,
"record_period": 300000,
# distributed setting
"update_period": 32,
"num_workers": 16,
}
47 changes: 47 additions & 0 deletions jorldy/config/sac/super_mario_bros.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
### SAC Super Mario Bros Config ###

env = {
"name": "super_mario_bros",
"render": False,
"gray_img": True,
"img_width": 84,
"img_height": 84,
"stack_frame": 4,
}

agent = {
"name": "sac",
"actor": "discrete_policy",
"critic": "discrete_q_network",
"use_dynamic_alpha": True,
"gamma": 0.99,
"tau": 5e-3,
"buffer_size": 50000,
"batch_size": 32,
"start_train_step": 25000,
"static_log_alpha": -3.0,
"target_update_period": 1000,
}

optim = {
"actor": "adam",
"critic": "adam",
"alpha": "adam",
"actor_lr": 1.5e-4,
"critic_lr": 3e-4,
"alpha_lr": 1e-5,
}

train = {
"training": True,
"load_path": None,
"run_step": 100000000,
"print_period": 5000,
"save_period": 50000,
"eval_iteration": 1,
"record": True,
"record_period": 200000,
# distributed setting
"update_period": 32,
"num_workers": 16,
}
Loading