From e70f9e3fc41f6809df0ca2c1e07b45b71ce316f8 Mon Sep 17 00:00:00 2001 From: ramanuzan <91711496+ramanuzan@users.noreply.github.com> Date: Mon, 3 Jan 2022 11:47:54 +0900 Subject: [PATCH] Feature/atari wrapper (#92) * implement wrapper * update config and default value * fix minor * Update conftest.py * update wrapper code * :art: Format Python code with psf/black (#95) Co-authored-by: ramanuzan * update skip frame buffer * update log file name %Y%m%d%H%M%S -> %Y%m%d%H%M%S%f * update denominator * update config * update fire reset assertion * apply black Co-authored-by: root Co-authored-by: root Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: ramanuzan Co-authored-by: root --- jorldy/config/ape_x/atari.py | 5 +- jorldy/config/ape_x/pong_mlagent.py | 2 +- jorldy/config/ape_x/procgen.py | 3 +- jorldy/config/c51/atari.py | 5 +- jorldy/config/c51/pong_mlagent.py | 2 +- jorldy/config/c51/procgen.py | 3 +- jorldy/config/ddpg/hopper_mlagent.py | 2 +- jorldy/config/double/atari.py | 5 +- jorldy/config/double/pong_mlagent.py | 2 +- jorldy/config/double/procgen.py | 3 +- jorldy/config/dqn/atari.py | 13 +- jorldy/config/dqn/pong_mlagent.py | 2 +- jorldy/config/dqn/procgen.py | 3 +- jorldy/config/dueling/atari.py | 5 +- jorldy/config/dueling/pong_mlagent.py | 2 +- jorldy/config/dueling/procgen.py | 3 +- jorldy/config/icm_ppo/atari.py | 5 +- .../config/icm_ppo/drone_delivery_mlagent.py | 2 +- jorldy/config/icm_ppo/pong_mlagent.py | 2 +- jorldy/config/icm_ppo/procgen.py | 3 +- jorldy/config/icm_ppo/super_mario_bros.py | 5 +- jorldy/config/iqn/atari.py | 5 +- jorldy/config/iqn/pong_mlagent.py | 2 +- jorldy/config/iqn/procgen.py | 3 +- jorldy/config/m_dqn/atari.py | 5 +- jorldy/config/m_dqn/pong_mlagent.py | 2 +- jorldy/config/m_dqn/procgen.py | 3 +- jorldy/config/m_iqn/atari.py | 5 +- jorldy/config/m_iqn/pong_mlagent.py | 2 +- jorldy/config/m_iqn/procgen.py | 3 +- jorldy/config/mpo/atari.py | 5 +- jorldy/config/mpo/drone_delivery_mlagent.py | 2 +- jorldy/config/mpo/hopper_mlagent.py | 2 +- jorldy/config/mpo/pong_mlagent.py | 2 +- jorldy/config/mpo/procgen.py | 3 +- jorldy/config/multistep/atari.py | 5 +- jorldy/config/multistep/pong_mlagent.py | 2 +- jorldy/config/multistep/procgen.py | 3 +- jorldy/config/noisy/atari.py | 5 +- jorldy/config/noisy/pong_mlagent.py | 2 +- jorldy/config/noisy/procgen.py | 3 +- jorldy/config/per/atari.py | 5 +- jorldy/config/per/pong_mlagent.py | 2 +- jorldy/config/per/procgen.py | 3 +- jorldy/config/ppo/atari.py | 10 +- jorldy/config/ppo/drone_delivery_mlagent.py | 2 +- jorldy/config/ppo/hopper_mlagent.py | 2 +- jorldy/config/ppo/pong_mlagent.py | 2 +- jorldy/config/ppo/procgen.py | 3 +- jorldy/config/ppo/super_mario_bros.py | 4 +- jorldy/config/qrdqn/atari.py | 5 +- jorldy/config/qrdqn/pong_mlagent.py | 2 +- jorldy/config/qrdqn/procgen.py | 5 +- jorldy/config/rainbow/atari.py | 5 +- jorldy/config/rainbow/pong_mlagent.py | 2 +- jorldy/config/rainbow/procgen.py | 3 +- jorldy/config/rainbow_iqn/atari.py | 5 +- jorldy/config/rainbow_iqn/pong_mlagent.py | 2 +- jorldy/config/rainbow_iqn/procgen.py | 3 +- jorldy/config/rnd_ppo/atari.py | 5 +- .../config/rnd_ppo/drone_delivery_mlagent.py | 2 +- jorldy/config/rnd_ppo/pong_mlagent.py | 2 +- jorldy/config/rnd_ppo/procgen.py | 3 +- jorldy/config/rnd_ppo/super_mario_bros.py | 5 +- jorldy/config/sac/drone_delivery_mlagent.py | 2 +- jorldy/config/sac/hopper_mlagent.py | 2 +- jorldy/config/vmpo/atari.py | 5 +- jorldy/config/vmpo/drone_delivery_mlagent.py | 2 +- jorldy/config/vmpo/hopper_mlagent.py | 2 +- jorldy/config/vmpo/pong_mlagent.py | 2 +- jorldy/config/vmpo/procgen.py | 3 +- jorldy/core/agent/ppo.py | 2 +- jorldy/core/env/atari.py | 118 ++++++++++++------ jorldy/core/env/mlagent.py | 7 +- jorldy/core/env/nes.py | 9 +- jorldy/core/env/procgen.py | 61 ++++++--- jorldy/manager/eval_manager.py | 2 +- jorldy/manager/log_manager.py | 2 +- jorldy/test/conftest.py | 4 +- 79 files changed, 274 insertions(+), 172 deletions(-) diff --git a/jorldy/config/ape_x/atari.py b/jorldy/config/ape_x/atari.py index f4e15380..8d49e14f 100644 --- a/jorldy/config/ape_x/atari.py +++ b/jorldy/config/ape_x/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/ape_x/pong_mlagent.py b/jorldy/config/ape_x/pong_mlagent.py index c8ef3cfb..761abd46 100644 --- a/jorldy/config/ape_x/pong_mlagent.py +++ b/jorldy/config/ape_x/pong_mlagent.py @@ -1,6 +1,6 @@ ### Ape-X Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "ape_x", diff --git a/jorldy/config/ape_x/procgen.py b/jorldy/config/ape_x/procgen.py index f44408ca..c9c4cff2 100644 --- a/jorldy/config/ape_x/procgen.py +++ b/jorldy/config/ape_x/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/c51/atari.py b/jorldy/config/c51/atari.py index 6bf5f226..86deb870 100644 --- a/jorldy/config/c51/atari.py +++ b/jorldy/config/c51/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/c51/pong_mlagent.py b/jorldy/config/c51/pong_mlagent.py index b2ff35cd..5bd5291b 100644 --- a/jorldy/config/c51/pong_mlagent.py +++ b/jorldy/config/c51/pong_mlagent.py @@ -1,6 +1,6 @@ ### C51 Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "c51", diff --git a/jorldy/config/c51/procgen.py b/jorldy/config/c51/procgen.py index 5d7b778c..1e52735d 100644 --- a/jorldy/config/c51/procgen.py +++ b/jorldy/config/c51/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/ddpg/hopper_mlagent.py b/jorldy/config/ddpg/hopper_mlagent.py index 491f641f..51041fa7 100644 --- a/jorldy/config/ddpg/hopper_mlagent.py +++ b/jorldy/config/ddpg/hopper_mlagent.py @@ -1,6 +1,6 @@ ### DDPG Hopper Config ### -env = {"name": "hopper_mlagent", "train_mode": True} +env = {"name": "hopper_mlagent", "time_scale": 12.0} agent = { "name": "ddpg", diff --git a/jorldy/config/double/atari.py b/jorldy/config/double/atari.py index f74f2aee..31171a7a 100644 --- a/jorldy/config/double/atari.py +++ b/jorldy/config/double/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/double/pong_mlagent.py b/jorldy/config/double/pong_mlagent.py index 8231cd5f..fc8e9bfb 100644 --- a/jorldy/config/double/pong_mlagent.py +++ b/jorldy/config/double/pong_mlagent.py @@ -1,6 +1,6 @@ ### Double DQN Pong MLAgent Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "double", diff --git a/jorldy/config/double/procgen.py b/jorldy/config/double/procgen.py index c83319be..3fd56e23 100644 --- a/jorldy/config/double/procgen.py +++ b/jorldy/config/double/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/dqn/atari.py b/jorldy/config/dqn/atari.py index 5216d13f..22bd50c6 100644 --- a/jorldy/config/dqn/atari.py +++ b/jorldy/config/dqn/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { @@ -27,19 +28,19 @@ } optim = { - "name": "rmsprop", - "lr": 2.5e-4, + "name": "adam", + "lr": 1e-4, } train = { "training": True, "load_path": None, - "run_step": 30000000, + "run_step": 10000000, "print_period": 10000, "save_period": 100000, "eval_iteration": 5, "record": True, - "record_period": 300000, + "record_period": 500000, # distributed setting "update_period": 32, "num_workers": 16, diff --git a/jorldy/config/dqn/pong_mlagent.py b/jorldy/config/dqn/pong_mlagent.py index f5c0ff68..f6f86df3 100644 --- a/jorldy/config/dqn/pong_mlagent.py +++ b/jorldy/config/dqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "dqn", diff --git a/jorldy/config/dqn/procgen.py b/jorldy/config/dqn/procgen.py index 36a2153c..820b926e 100644 --- a/jorldy/config/dqn/procgen.py +++ b/jorldy/config/dqn/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/dueling/atari.py b/jorldy/config/dueling/atari.py index 0979dd01..d860ca5e 100644 --- a/jorldy/config/dueling/atari.py +++ b/jorldy/config/dueling/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/dueling/pong_mlagent.py b/jorldy/config/dueling/pong_mlagent.py index 40077c37..5a0ac23d 100644 --- a/jorldy/config/dueling/pong_mlagent.py +++ b/jorldy/config/dueling/pong_mlagent.py @@ -1,6 +1,6 @@ ### Dueling DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "dueling", diff --git a/jorldy/config/dueling/procgen.py b/jorldy/config/dueling/procgen.py index 2627df37..fcef9408 100644 --- a/jorldy/config/dueling/procgen.py +++ b/jorldy/config/dueling/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/icm_ppo/atari.py b/jorldy/config/icm_ppo/atari.py index 770c204b..839d88d0 100644 --- a/jorldy/config/icm_ppo/atari.py +++ b/jorldy/config/icm_ppo/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/icm_ppo/drone_delivery_mlagent.py b/jorldy/config/icm_ppo/drone_delivery_mlagent.py index e251a250..c63001f7 100644 --- a/jorldy/config/icm_ppo/drone_delivery_mlagent.py +++ b/jorldy/config/icm_ppo/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### ICM PPO Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "icm_ppo", diff --git a/jorldy/config/icm_ppo/pong_mlagent.py b/jorldy/config/icm_ppo/pong_mlagent.py index 93e3765a..a44daf3b 100644 --- a/jorldy/config/icm_ppo/pong_mlagent.py +++ b/jorldy/config/icm_ppo/pong_mlagent.py @@ -1,6 +1,6 @@ ### ICM PPO Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "icm_ppo", diff --git a/jorldy/config/icm_ppo/procgen.py b/jorldy/config/icm_ppo/procgen.py index 36774a4d..b16f08ee 100644 --- a/jorldy/config/icm_ppo/procgen.py +++ b/jorldy/config/icm_ppo/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/icm_ppo/super_mario_bros.py b/jorldy/config/icm_ppo/super_mario_bros.py index 3ad0b22c..d48731b6 100644 --- a/jorldy/config/icm_ppo/super_mario_bros.py +++ b/jorldy/config/icm_ppo/super_mario_bros.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/iqn/atari.py b/jorldy/config/iqn/atari.py index 31e48861..85f03ba4 100644 --- a/jorldy/config/iqn/atari.py +++ b/jorldy/config/iqn/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/iqn/pong_mlagent.py b/jorldy/config/iqn/pong_mlagent.py index 0e1a565b..58fb0296 100644 --- a/jorldy/config/iqn/pong_mlagent.py +++ b/jorldy/config/iqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### IQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "iqn", diff --git a/jorldy/config/iqn/procgen.py b/jorldy/config/iqn/procgen.py index a0cd11d2..2c090c01 100644 --- a/jorldy/config/iqn/procgen.py +++ b/jorldy/config/iqn/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/m_dqn/atari.py b/jorldy/config/m_dqn/atari.py index f1eb334d..eb3feab8 100644 --- a/jorldy/config/m_dqn/atari.py +++ b/jorldy/config/m_dqn/atari.py @@ -8,8 +8,9 @@ "img_height": 84, "stack_frame": 4, "no_op": True, - "reward_clip": False, - "dead_penalty": False, + "skip_frame": 4, + "reward_clip": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/m_dqn/pong_mlagent.py b/jorldy/config/m_dqn/pong_mlagent.py index ec406c9b..ca8fd39a 100644 --- a/jorldy/config/m_dqn/pong_mlagent.py +++ b/jorldy/config/m_dqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### Munchausen DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "m_dqn", diff --git a/jorldy/config/m_dqn/procgen.py b/jorldy/config/m_dqn/procgen.py index 25d224e7..1174e2d8 100644 --- a/jorldy/config/m_dqn/procgen.py +++ b/jorldy/config/m_dqn/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/m_iqn/atari.py b/jorldy/config/m_iqn/atari.py index 5d473468..29856660 100644 --- a/jorldy/config/m_iqn/atari.py +++ b/jorldy/config/m_iqn/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/m_iqn/pong_mlagent.py b/jorldy/config/m_iqn/pong_mlagent.py index 743e3910..2b28ffea 100644 --- a/jorldy/config/m_iqn/pong_mlagent.py +++ b/jorldy/config/m_iqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### Munchausen IQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "m_iqn", diff --git a/jorldy/config/m_iqn/procgen.py b/jorldy/config/m_iqn/procgen.py index dc0968d4..d8181d2c 100644 --- a/jorldy/config/m_iqn/procgen.py +++ b/jorldy/config/m_iqn/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/mpo/atari.py b/jorldy/config/mpo/atari.py index aad0eb0b..1db67da0 100644 --- a/jorldy/config/mpo/atari.py +++ b/jorldy/config/mpo/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/mpo/drone_delivery_mlagent.py b/jorldy/config/mpo/drone_delivery_mlagent.py index 72e04914..faa2a18e 100644 --- a/jorldy/config/mpo/drone_delivery_mlagent.py +++ b/jorldy/config/mpo/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### MPO Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "mpo", diff --git a/jorldy/config/mpo/hopper_mlagent.py b/jorldy/config/mpo/hopper_mlagent.py index 7c06618e..34f88ff5 100644 --- a/jorldy/config/mpo/hopper_mlagent.py +++ b/jorldy/config/mpo/hopper_mlagent.py @@ -1,6 +1,6 @@ ### MPO Hopper Config ### -env = {"name": "hopper_mlagent", "train_mode": True} +env = {"name": "hopper_mlagent", "time_scale": 12.0} agent = { "name": "mpo", diff --git a/jorldy/config/mpo/pong_mlagent.py b/jorldy/config/mpo/pong_mlagent.py index dbc8e2c1..7ff65585 100644 --- a/jorldy/config/mpo/pong_mlagent.py +++ b/jorldy/config/mpo/pong_mlagent.py @@ -1,6 +1,6 @@ ### MPO Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "mpo", diff --git a/jorldy/config/mpo/procgen.py b/jorldy/config/mpo/procgen.py index cab53be1..524eccc8 100644 --- a/jorldy/config/mpo/procgen.py +++ b/jorldy/config/mpo/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/multistep/atari.py b/jorldy/config/multistep/atari.py index a3741418..58065542 100644 --- a/jorldy/config/multistep/atari.py +++ b/jorldy/config/multistep/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/multistep/pong_mlagent.py b/jorldy/config/multistep/pong_mlagent.py index 403f68ee..a2a841fb 100644 --- a/jorldy/config/multistep/pong_mlagent.py +++ b/jorldy/config/multistep/pong_mlagent.py @@ -1,6 +1,6 @@ ### Multistep DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "multistep", diff --git a/jorldy/config/multistep/procgen.py b/jorldy/config/multistep/procgen.py index b37dd63a..b2199c90 100644 --- a/jorldy/config/multistep/procgen.py +++ b/jorldy/config/multistep/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/noisy/atari.py b/jorldy/config/noisy/atari.py index 97bd8283..c01d199d 100644 --- a/jorldy/config/noisy/atari.py +++ b/jorldy/config/noisy/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": False, + "episodic_life": True, } agent = { diff --git a/jorldy/config/noisy/pong_mlagent.py b/jorldy/config/noisy/pong_mlagent.py index 8ce5a430..d1fbe4d2 100644 --- a/jorldy/config/noisy/pong_mlagent.py +++ b/jorldy/config/noisy/pong_mlagent.py @@ -1,6 +1,6 @@ ### Noisy DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "noisy", diff --git a/jorldy/config/noisy/procgen.py b/jorldy/config/noisy/procgen.py index 6d0ef0bf..1efbb428 100644 --- a/jorldy/config/noisy/procgen.py +++ b/jorldy/config/noisy/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/per/atari.py b/jorldy/config/per/atari.py index d0cbfd61..f0acd276 100644 --- a/jorldy/config/per/atari.py +++ b/jorldy/config/per/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/per/pong_mlagent.py b/jorldy/config/per/pong_mlagent.py index 00b57a12..fd0ecea5 100644 --- a/jorldy/config/per/pong_mlagent.py +++ b/jorldy/config/per/pong_mlagent.py @@ -1,6 +1,6 @@ ### PER Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "per", diff --git a/jorldy/config/per/procgen.py b/jorldy/config/per/procgen.py index 857bb0e4..692e7a16 100644 --- a/jorldy/config/per/procgen.py +++ b/jorldy/config/per/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/ppo/atari.py b/jorldy/config/ppo/atari.py index a04727b6..1afb3e72 100644 --- a/jorldy/config/ppo/atari.py +++ b/jorldy/config/ppo/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { @@ -25,6 +26,7 @@ "vf_coef": 1.0, "ent_coef": 0.01, "clip_grad_norm": 1.0, + "use_standardization": True, } optim = { @@ -35,12 +37,12 @@ train = { "training": True, "load_path": None, - "run_step": 30000000, + "run_step": 10000000, "print_period": 10000, "save_period": 100000, "eval_iteration": 5, "record": True, - "record_period": 300000, + "record_period": 500000, # distributed setting "distributed_batch_size": 256, "update_period": agent["n_step"], diff --git a/jorldy/config/ppo/drone_delivery_mlagent.py b/jorldy/config/ppo/drone_delivery_mlagent.py index 3a672864..c754d1d8 100644 --- a/jorldy/config/ppo/drone_delivery_mlagent.py +++ b/jorldy/config/ppo/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### PPO Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "ppo", diff --git a/jorldy/config/ppo/hopper_mlagent.py b/jorldy/config/ppo/hopper_mlagent.py index 792634e1..277eb0b0 100644 --- a/jorldy/config/ppo/hopper_mlagent.py +++ b/jorldy/config/ppo/hopper_mlagent.py @@ -1,6 +1,6 @@ ### PPO Hopper Config ### -env = {"name": "hopper_mlagent", "train_mode": True} +env = {"name": "hopper_mlagent", "time_scale": 12.0} agent = { "name": "ppo", diff --git a/jorldy/config/ppo/pong_mlagent.py b/jorldy/config/ppo/pong_mlagent.py index 4f7adda9..22b4dc9d 100644 --- a/jorldy/config/ppo/pong_mlagent.py +++ b/jorldy/config/ppo/pong_mlagent.py @@ -1,6 +1,6 @@ ### PPO Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "ppo", diff --git a/jorldy/config/ppo/procgen.py b/jorldy/config/ppo/procgen.py index b6138d93..37cffc61 100644 --- a/jorldy/config/ppo/procgen.py +++ b/jorldy/config/ppo/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/ppo/super_mario_bros.py b/jorldy/config/ppo/super_mario_bros.py index 4252e03f..07603cfe 100644 --- a/jorldy/config/ppo/super_mario_bros.py +++ b/jorldy/config/ppo/super_mario_bros.py @@ -7,9 +7,9 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - # "no_op": True, + "no_op": True, "reward_clip": True, - "dead_penalty": False, + "episodic_life": True, } agent = { diff --git a/jorldy/config/qrdqn/atari.py b/jorldy/config/qrdqn/atari.py index a708a72c..e983b119 100644 --- a/jorldy/config/qrdqn/atari.py +++ b/jorldy/config/qrdqn/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/qrdqn/pong_mlagent.py b/jorldy/config/qrdqn/pong_mlagent.py index 880c82b7..6255452b 100644 --- a/jorldy/config/qrdqn/pong_mlagent.py +++ b/jorldy/config/qrdqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### QRDQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "qrdqn", diff --git a/jorldy/config/qrdqn/procgen.py b/jorldy/config/qrdqn/procgen.py index c691bb93..dc56f7b8 100644 --- a/jorldy/config/qrdqn/procgen.py +++ b/jorldy/config/qrdqn/procgen.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/rainbow/atari.py b/jorldy/config/rainbow/atari.py index d3f44082..d5fb18b5 100644 --- a/jorldy/config/rainbow/atari.py +++ b/jorldy/config/rainbow/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": False, + "episodic_life": True, } agent = { diff --git a/jorldy/config/rainbow/pong_mlagent.py b/jorldy/config/rainbow/pong_mlagent.py index 7616a25f..b5f9996f 100644 --- a/jorldy/config/rainbow/pong_mlagent.py +++ b/jorldy/config/rainbow/pong_mlagent.py @@ -1,6 +1,6 @@ ### Rainbow DQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "rainbow", diff --git a/jorldy/config/rainbow/procgen.py b/jorldy/config/rainbow/procgen.py index 9f3e8ed5..3cd7bc03 100644 --- a/jorldy/config/rainbow/procgen.py +++ b/jorldy/config/rainbow/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/rainbow_iqn/atari.py b/jorldy/config/rainbow_iqn/atari.py index 2808ccf3..0a6bf21e 100644 --- a/jorldy/config/rainbow_iqn/atari.py +++ b/jorldy/config/rainbow_iqn/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/rainbow_iqn/pong_mlagent.py b/jorldy/config/rainbow_iqn/pong_mlagent.py index 7bacc04f..cd9da6d4 100644 --- a/jorldy/config/rainbow_iqn/pong_mlagent.py +++ b/jorldy/config/rainbow_iqn/pong_mlagent.py @@ -1,6 +1,6 @@ ### Rainbow IQN Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "rainbow_iqn", diff --git a/jorldy/config/rainbow_iqn/procgen.py b/jorldy/config/rainbow_iqn/procgen.py index 5a7492bd..ae695784 100644 --- a/jorldy/config/rainbow_iqn/procgen.py +++ b/jorldy/config/rainbow_iqn/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/rnd_ppo/atari.py b/jorldy/config/rnd_ppo/atari.py index 51628430..95c51efc 100644 --- a/jorldy/config/rnd_ppo/atari.py +++ b/jorldy/config/rnd_ppo/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/rnd_ppo/drone_delivery_mlagent.py b/jorldy/config/rnd_ppo/drone_delivery_mlagent.py index d1f4704a..dbce3783 100644 --- a/jorldy/config/rnd_ppo/drone_delivery_mlagent.py +++ b/jorldy/config/rnd_ppo/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### RND PPO Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "rnd_ppo", diff --git a/jorldy/config/rnd_ppo/pong_mlagent.py b/jorldy/config/rnd_ppo/pong_mlagent.py index 38b22588..938066b3 100644 --- a/jorldy/config/rnd_ppo/pong_mlagent.py +++ b/jorldy/config/rnd_ppo/pong_mlagent.py @@ -1,6 +1,6 @@ ### RND PPO Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "rnd_ppo", diff --git a/jorldy/config/rnd_ppo/procgen.py b/jorldy/config/rnd_ppo/procgen.py index 1b3e6185..77cae6eb 100644 --- a/jorldy/config/rnd_ppo/procgen.py +++ b/jorldy/config/rnd_ppo/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/config/rnd_ppo/super_mario_bros.py b/jorldy/config/rnd_ppo/super_mario_bros.py index 6723a313..a688e4a2 100644 --- a/jorldy/config/rnd_ppo/super_mario_bros.py +++ b/jorldy/config/rnd_ppo/super_mario_bros.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": False, + "episodic_life": True, } agent = { diff --git a/jorldy/config/sac/drone_delivery_mlagent.py b/jorldy/config/sac/drone_delivery_mlagent.py index 43ec8352..61ae09b7 100644 --- a/jorldy/config/sac/drone_delivery_mlagent.py +++ b/jorldy/config/sac/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### SAC Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "sac", diff --git a/jorldy/config/sac/hopper_mlagent.py b/jorldy/config/sac/hopper_mlagent.py index 271e9ac4..106000cc 100644 --- a/jorldy/config/sac/hopper_mlagent.py +++ b/jorldy/config/sac/hopper_mlagent.py @@ -1,6 +1,6 @@ ### SAC Hopper Config ### -env = {"name": "hopper_mlagent", "train_mode": True} +env = {"name": "hopper_mlagent", "time_scale": 12.0} agent = { "name": "sac", diff --git a/jorldy/config/vmpo/atari.py b/jorldy/config/vmpo/atari.py index 7e740db1..134dee22 100644 --- a/jorldy/config/vmpo/atari.py +++ b/jorldy/config/vmpo/atari.py @@ -7,9 +7,10 @@ "img_width": 84, "img_height": 84, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, - "dead_penalty": True, + "episodic_life": True, } agent = { diff --git a/jorldy/config/vmpo/drone_delivery_mlagent.py b/jorldy/config/vmpo/drone_delivery_mlagent.py index 040c9057..8b324548 100644 --- a/jorldy/config/vmpo/drone_delivery_mlagent.py +++ b/jorldy/config/vmpo/drone_delivery_mlagent.py @@ -1,6 +1,6 @@ ### V-MPO Drone Delivery Config ### -env = {"name": "drone_delivery_mlagent", "train_mode": True} +env = {"name": "drone_delivery_mlagent", "time_scale": 12.0} agent = { "name": "vmpo", diff --git a/jorldy/config/vmpo/hopper_mlagent.py b/jorldy/config/vmpo/hopper_mlagent.py index 263ed661..f295717b 100644 --- a/jorldy/config/vmpo/hopper_mlagent.py +++ b/jorldy/config/vmpo/hopper_mlagent.py @@ -1,6 +1,6 @@ ### V-MPO Hopper Config ### -env = {"name": "hopper_mlagent", "train_mode": True} +env = {"name": "hopper_mlagent", "time_scale": 12.0} agent = { "name": "vmpo", diff --git a/jorldy/config/vmpo/pong_mlagent.py b/jorldy/config/vmpo/pong_mlagent.py index 93c52a67..81d54517 100644 --- a/jorldy/config/vmpo/pong_mlagent.py +++ b/jorldy/config/vmpo/pong_mlagent.py @@ -1,6 +1,6 @@ ### V-MPO Pong_ML-Agents Config ### -env = {"name": "pong_mlagent", "train_mode": True} +env = {"name": "pong_mlagent", "time_scale": 12.0} agent = { "name": "vmpo", diff --git a/jorldy/config/vmpo/procgen.py b/jorldy/config/vmpo/procgen.py index 9f41d444..00f958f6 100644 --- a/jorldy/config/vmpo/procgen.py +++ b/jorldy/config/vmpo/procgen.py @@ -5,7 +5,8 @@ "render": False, "gray_img": True, "stack_frame": 4, - "no_op": False, + "no_op": True, + "skip_frame": 4, "reward_clip": True, } diff --git a/jorldy/core/agent/ppo.py b/jorldy/core/agent/ppo.py index 79adecd2..8d56c556 100644 --- a/jorldy/core/agent/ppo.py +++ b/jorldy/core/agent/ppo.py @@ -101,7 +101,7 @@ def learn(self): ) if self.use_standardization: adv = (adv - adv.mean(dim=1, keepdim=True)) / ( - adv.std(dim=1, keepdim=True) + 1e-7 + adv.std(dim=1, keepdim=True) + 1e-6 ) adv = adv.view(-1, 1) ret = adv + value diff --git a/jorldy/core/env/atari.py b/jorldy/core/env/atari.py index 8aeecab7..a99081e3 100644 --- a/jorldy/core/env/atari.py +++ b/jorldy/core/env/atari.py @@ -4,7 +4,7 @@ from .utils import ImgProcessor from .base import BaseEnv -COMMON_VERSION = "Deterministic-v4" +COMMON_VERSION = "NoFrameskip-v4" class _Atari(BaseEnv): @@ -19,9 +19,12 @@ class _Atari(BaseEnv): stack_frame (int): the number of stacked frame in one single state. life_key (str): key of life query function in emulator. no_op (bool): parameter that determine whether or not to operate during the first 30(no_op_max) steps. + skip_frame (int) : the number of skipped frame. reward_clip (bool): parameter that determine whether to use reward clipping. - reward_scale (float): reward normalization denominator. - dead_penatly (bool): parameter that determine whether to use penalty when the agent dies. + episodic_life (bool): parameter that determine done is True when dead is True. + fire_reset (bool): parameter that determine take action on reset for environments that are fixed until firing. + train_mode (bool): parameter that determine whether train mode or not. + """ def __init__( @@ -33,10 +36,12 @@ def __init__( img_height=84, stack_frame=4, life_key="lives", - no_op=False, - reward_clip=False, - reward_scale=None, - dead_penalty=False, + no_op=True, + skip_frame=4, + reward_clip=True, + episodic_life=True, + fire_reset=True, + train_mode=True, **kwargs, ): self.render = render @@ -61,30 +66,47 @@ def __init__( self.life_key = life_key self.no_op = no_op self.no_op_max = 30 + assert isinstance(skip_frame, int) and skip_frame > 0 + self.skip_frame = skip_frame + self.skip_frame_buffer = np.zeros( + (2,) + self.env.observation_space.shape, dtype=np.uint8 + ) self.reward_clip = reward_clip - self.reward_scale = reward_scale - self.dead_penalty = dead_penalty + self.episodic_life = episodic_life + self.was_real_done = True + self.fire_reset = fire_reset and ( + self.env.unwrapped.get_action_meanings()[1] == "FIRE" + ) + self.train_mode = train_mode print(f"{name} Start!") print(f"state size: {self.state_size}") print(f"action size: {self.action_size}") def reset(self): - self.env.reset() - state, reward, _, info = self.env.step(1) - - self.score = reward - self.life = info[self.life_key] - - if self.no_op: - for _ in range(np.random.randint(0, self.no_op_max)): + total_reward = 0 + if self.was_real_done: + state = self.env.reset() + self.was_real_done = False + if self.no_op: + num_no_op = np.random.randint(1, self.no_op_max) + for i in range(num_no_op): + state, reward, done, info = self.env.step(0) + total_reward += reward + if done: + self.env.reset() + if self.fire_reset: + state, reward, done, info = self.env.step(1) + self.life = info[self.life_key] + total_reward += reward + else: + if self.fire_reset: + state, reward, _, info = self.env.step(1) + else: state, reward, _, info = self.env.step(0) - self.score += reward - if self.life != info[self.life_key]: - if self.life > info[self.life_key]: - state, reward, _, _ = self.env.step(1) - self.score += reward - self.life = info[self.life_key] + self.life = info[self.life_key] + total_reward += reward + self.score = total_reward state = self.img_processor.convert_img(state) self.stacked_state = np.tile(state, (self.stack_frame, 1, 1)) @@ -95,33 +117,47 @@ def step(self, action): if self.render: self.env.render() - next_state, reward, done, info = self.env.step(action.item()) - self.score += reward - - dead = False - if self.life != info[self.life_key] and not done: - if self.life > info[self.life_key]: - state, _reward, _, _ = self.env.step(1) - self.score += _reward - dead = True - self.life = info[self.life_key] + dead, total_reward = False, 0 + for i in range(self.skip_frame): + next_state, reward, done, info = self.env.step(action.item()) + total_reward += reward + _dead = False + if self.life != info[self.life_key] and not done: + if self.life > info[self.life_key]: + if self.fire_reset: + next_state, reward, _, _ = self.env.step(1) + total_reward += reward + _dead = True + self.life = info[self.life_key] + + dead = dead or _dead + if i == self.skip_frame - 2: + self.skip_frame_buffer[0] = next_state + if i == self.skip_frame - 1: + self.skip_frame_buffer[1] = next_state + + if done: + self.was_real_done = True + break + + self.score += total_reward + + next_state = self.skip_frame_buffer.max(axis=0) next_state = self.img_processor.convert_img(next_state) self.stacked_state = np.concatenate( (self.stacked_state[self.num_channel :], next_state), axis=0 ) if self.reward_clip: - reward = ( - reward / self.reward_scale if self.reward_scale else np.tanh(reward) - ) + total_reward = np.sign(total_reward) - if dead and self.dead_penalty: - reward = -1 + if self.episodic_life and self.train_mode: + done = dead or done - next_state, reward, done = map( - lambda x: np.expand_dims(x, 0), [self.stacked_state, [reward], [done]] + next_state, total_reward, done = map( + lambda x: np.expand_dims(x, 0), [self.stacked_state, [total_reward], [done]] ) - return (next_state, reward, done) + return (next_state, total_reward, done) def close(self): self.env.close() diff --git a/jorldy/core/env/mlagent.py b/jorldy/core/env/mlagent.py index 5772b26a..18d14867 100644 --- a/jorldy/core/env/mlagent.py +++ b/jorldy/core/env/mlagent.py @@ -18,11 +18,11 @@ class _MLAgent(BaseEnv): Args: env_name (str): name of environment in ML-Agents. - train_mode (bool): parameter that determine whether to use low-resource training rendering mode. render (bool): parameter that determine whether to render. + time_scale (bool): parameter that determine frame time_scale. """ - def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs): + def __init__(self, env_name, render=False, time_scale=12.0, id=None, **kwargs): env_path = f"./core/env/mlagents/{env_name}/{match_build()}/{env_name}" id = ( np.random.randint(65534 - UnityEnvironment.BASE_ENVIRONMENT_PORT) @@ -43,7 +43,6 @@ def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs): self.env.reset() - self.train_mode = train_mode self.score = 0 self.behavior_name = list(self.env.behavior_specs.keys())[0] @@ -51,7 +50,7 @@ def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs): self.is_continuous_action = self.spec.action_spec.is_continuous() - engine_configuration_channel.set_configuration_parameters(time_scale=12.0) + engine_configuration_channel.set_configuration_parameters(time_scale=time_scale) dec, term = self.env.get_steps(self.behavior_name) def reset(self): diff --git a/jorldy/core/env/nes.py b/jorldy/core/env/nes.py index 71088575..88d4fc38 100644 --- a/jorldy/core/env/nes.py +++ b/jorldy/core/env/nes.py @@ -17,7 +17,9 @@ class _Nes(_Atari): """ def __init__(self, name, **kwargs): - super(_Nes, self).__init__(name=name, life_key="life", **kwargs) + super(_Nes, self).__init__( + name=name, life_key="life", fire_reset=False, **kwargs + ) self.env = JoypadSpace(self.env, RIGHT_ONLY) print(f"action size changed: {self.action_size} -> {self.env.action_space.n}") self.action_size = self.env.action_space.n @@ -29,7 +31,4 @@ def get_frame(self): class SuperMarioBros(_Nes): def __init__(self, **kwargs): - reward_scale = 15.0 - super(SuperMarioBros, self).__init__( - "SuperMarioBros-v0", reward_scale=reward_scale, **kwargs - ) + super(SuperMarioBros, self).__init__("SuperMarioBros-v0", **kwargs) diff --git a/jorldy/core/env/procgen.py b/jorldy/core/env/procgen.py index 87751366..991a8e94 100644 --- a/jorldy/core/env/procgen.py +++ b/jorldy/core/env/procgen.py @@ -17,6 +17,7 @@ class _Procgen(BaseEnv): img_height (int): height of image input. stack_frame (int): the number of stacked frame in one single state. no_op (bool): parameter that determine whether or not to operate during the first 30(no_op_max) steps. + skip_frame (int) : the number of skipped frame. reward_clip (bool): parameter that determine whether to use reward clipping. """ @@ -28,8 +29,9 @@ def __init__( img_width=64, img_height=64, stack_frame=4, - no_op=False, - reward_clip=False, + no_op=True, + skip_frame=4, + reward_clip=True, **kwargs, ): self.render = render @@ -46,12 +48,18 @@ def __init__( ) self.env = ProcgenEnv(1, name, render_mode="rgb_array") + self.state_size = [stack_frame, img_height, img_width] self.action_size = self.env.action_space.n self.action_type = "discrete" self.score = 0 self.no_op = no_op self.no_op_max = 30 + assert isinstance(skip_frame, int) and skip_frame > 0 + self.skip_frame = skip_frame + self.skip_frame_buffer = np.zeros( + (2,) + self.env.observation_space["rgb"].shape, dtype=np.uint8 + ) self.reward_clip = reward_clip print(f"{name} Start!") @@ -59,16 +67,22 @@ def __init__( print(f"action size: {self.action_size}") def reset(self): - state = self.env.reset()["rgb"][0] - - obs, reward, _, info = self.env.step(np.ones(1)) - self.score = reward[0] + self.env.reset() + total_reward = 0 + obs = self.env.reset() if self.no_op: - for _ in range(np.random.randint(0, self.no_op_max)): - obs, reward, _, info = self.env.step(np.zeros(1)) - self.score += reward - state = self.img_processor.convert_img(obs["rgb"][0]) + num_no_op = np.random.randint(1, self.no_op_max) + for i in range(num_no_op): + obs, reward, done, info = self.env.step(np.zeros(1)) + total_reward += reward[0] + if done: + obs = self.env.reset() + + state = obs["rgb"][0] + self.score = total_reward + + state = self.img_processor.convert_img(state) self.stacked_state = np.tile(state, (self.stack_frame, 1, 1)) state = np.expand_dims(self.stacked_state, 0) return state @@ -76,21 +90,34 @@ def reset(self): def step(self, action): if self.render: self.env.render() - next_obs, reward, done, info = self.env.step(action.reshape((1,))) - self.score += reward[0] - next_state = self.img_processor.convert_img(next_obs["rgb"][0]) + total_reward = 0 + for i in range(self.skip_frame): + next_obs, reward, done, info = self.env.step(action.reshape((1,))) + next_state = next_obs["rgb"][0] + total_reward += reward + + if i == self.skip_frame - 2: + self.skip_frame_buffer[0] = next_state + if i == self.skip_frame - 1: + self.skip_frame_buffer[1] = next_state + + if done: + break + + next_state = self.skip_frame_buffer.max(axis=0) + next_state = self.img_processor.convert_img(next_state) self.stacked_state = np.concatenate( (self.stacked_state[self.num_channel :], next_state), axis=0 ) if self.reward_clip: - reward = np.tanh(reward) + total_reward = np.sign(total_reward) - next_state, reward, done = map( - lambda x: np.expand_dims(x, 0), [self.stacked_state, reward, done] + next_state, total_reward, done = map( + lambda x: np.expand_dims(x, 0), [self.stacked_state, total_reward, done] ) - return (next_state, reward, done) + return (next_state, total_reward, done) def close(self): self.env.close() diff --git a/jorldy/manager/eval_manager.py b/jorldy/manager/eval_manager.py index b95bacf4..779bb923 100644 --- a/jorldy/manager/eval_manager.py +++ b/jorldy/manager/eval_manager.py @@ -3,7 +3,7 @@ class EvalManager: def __init__(self, Env, env_config, iteration=10, record=None, record_period=None): - self.env = Env(**env_config) + self.env = Env(**env_config, train_mode=False) self.iteration = iteration if iteration else 10 assert iteration > 0 self.record = record and self.env.recordable() diff --git a/jorldy/manager/log_manager.py b/jorldy/manager/log_manager.py index cedb9ce5..6b269f5b 100644 --- a/jorldy/manager/log_manager.py +++ b/jorldy/manager/log_manager.py @@ -9,7 +9,7 @@ class LogManager: def __init__(self, env, id, experiment=None): self.id = id - now = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + now = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f") self.path = ( f"./logs/{experiment}/{env}/{id}/{now}/" if experiment diff --git a/jorldy/test/conftest.py b/jorldy/test/conftest.py index b5909cb3..7a045381 100644 --- a/jorldy/test/conftest.py +++ b/jorldy/test/conftest.py @@ -7,7 +7,7 @@ class _MockEnv: - def __init__(self, state_size, action_size, action_type, episode_len): + def __init__(self, state_size, action_size, action_type, episode_len, **kwargs): self.state_size = state_size self.action_size = action_size self.action_type = action_type @@ -46,7 +46,7 @@ def recordable(self): class _MockAgent: - def __init__(self, state_size, action_size, action_type): + def __init__(self, state_size, action_size, action_type, **kwargs): self.state_size = state_size self.action_size = action_size self.action_type = action_type