From e70f9e3fc41f6809df0ca2c1e07b45b71ce316f8 Mon Sep 17 00:00:00 2001
From: ramanuzan <91711496+ramanuzan@users.noreply.github.com>
Date: Mon, 3 Jan 2022 11:47:54 +0900
Subject: [PATCH] Feature/atari wrapper (#92)

* implement wrapper

* update config and default value

* fix minor

* Update conftest.py

* update wrapper code

* :art: Format Python code with psf/black (#95)

Co-authored-by: ramanuzan <ramanuzan@users.noreply.github.com>

* update skip frame buffer

* update log file name %Y%m%d%H%M%S -> %Y%m%d%H%M%S%f

* update denominator

* update config

* update fire reset assertion

* apply black

Co-authored-by: root <root@gpu-cloud-vnode53.dakao.io>
Co-authored-by: root <root@brain-cluster-gpu9.dakao.io>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: ramanuzan <ramanuzan@users.noreply.github.com>
Co-authored-by: root <root@gpu-cloud-node5.dakao.io>
---
 jorldy/config/ape_x/atari.py                  |   5 +-
 jorldy/config/ape_x/pong_mlagent.py           |   2 +-
 jorldy/config/ape_x/procgen.py                |   3 +-
 jorldy/config/c51/atari.py                    |   5 +-
 jorldy/config/c51/pong_mlagent.py             |   2 +-
 jorldy/config/c51/procgen.py                  |   3 +-
 jorldy/config/ddpg/hopper_mlagent.py          |   2 +-
 jorldy/config/double/atari.py                 |   5 +-
 jorldy/config/double/pong_mlagent.py          |   2 +-
 jorldy/config/double/procgen.py               |   3 +-
 jorldy/config/dqn/atari.py                    |  13 +-
 jorldy/config/dqn/pong_mlagent.py             |   2 +-
 jorldy/config/dqn/procgen.py                  |   3 +-
 jorldy/config/dueling/atari.py                |   5 +-
 jorldy/config/dueling/pong_mlagent.py         |   2 +-
 jorldy/config/dueling/procgen.py              |   3 +-
 jorldy/config/icm_ppo/atari.py                |   5 +-
 .../config/icm_ppo/drone_delivery_mlagent.py  |   2 +-
 jorldy/config/icm_ppo/pong_mlagent.py         |   2 +-
 jorldy/config/icm_ppo/procgen.py              |   3 +-
 jorldy/config/icm_ppo/super_mario_bros.py     |   5 +-
 jorldy/config/iqn/atari.py                    |   5 +-
 jorldy/config/iqn/pong_mlagent.py             |   2 +-
 jorldy/config/iqn/procgen.py                  |   3 +-
 jorldy/config/m_dqn/atari.py                  |   5 +-
 jorldy/config/m_dqn/pong_mlagent.py           |   2 +-
 jorldy/config/m_dqn/procgen.py                |   3 +-
 jorldy/config/m_iqn/atari.py                  |   5 +-
 jorldy/config/m_iqn/pong_mlagent.py           |   2 +-
 jorldy/config/m_iqn/procgen.py                |   3 +-
 jorldy/config/mpo/atari.py                    |   5 +-
 jorldy/config/mpo/drone_delivery_mlagent.py   |   2 +-
 jorldy/config/mpo/hopper_mlagent.py           |   2 +-
 jorldy/config/mpo/pong_mlagent.py             |   2 +-
 jorldy/config/mpo/procgen.py                  |   3 +-
 jorldy/config/multistep/atari.py              |   5 +-
 jorldy/config/multistep/pong_mlagent.py       |   2 +-
 jorldy/config/multistep/procgen.py            |   3 +-
 jorldy/config/noisy/atari.py                  |   5 +-
 jorldy/config/noisy/pong_mlagent.py           |   2 +-
 jorldy/config/noisy/procgen.py                |   3 +-
 jorldy/config/per/atari.py                    |   5 +-
 jorldy/config/per/pong_mlagent.py             |   2 +-
 jorldy/config/per/procgen.py                  |   3 +-
 jorldy/config/ppo/atari.py                    |  10 +-
 jorldy/config/ppo/drone_delivery_mlagent.py   |   2 +-
 jorldy/config/ppo/hopper_mlagent.py           |   2 +-
 jorldy/config/ppo/pong_mlagent.py             |   2 +-
 jorldy/config/ppo/procgen.py                  |   3 +-
 jorldy/config/ppo/super_mario_bros.py         |   4 +-
 jorldy/config/qrdqn/atari.py                  |   5 +-
 jorldy/config/qrdqn/pong_mlagent.py           |   2 +-
 jorldy/config/qrdqn/procgen.py                |   5 +-
 jorldy/config/rainbow/atari.py                |   5 +-
 jorldy/config/rainbow/pong_mlagent.py         |   2 +-
 jorldy/config/rainbow/procgen.py              |   3 +-
 jorldy/config/rainbow_iqn/atari.py            |   5 +-
 jorldy/config/rainbow_iqn/pong_mlagent.py     |   2 +-
 jorldy/config/rainbow_iqn/procgen.py          |   3 +-
 jorldy/config/rnd_ppo/atari.py                |   5 +-
 .../config/rnd_ppo/drone_delivery_mlagent.py  |   2 +-
 jorldy/config/rnd_ppo/pong_mlagent.py         |   2 +-
 jorldy/config/rnd_ppo/procgen.py              |   3 +-
 jorldy/config/rnd_ppo/super_mario_bros.py     |   5 +-
 jorldy/config/sac/drone_delivery_mlagent.py   |   2 +-
 jorldy/config/sac/hopper_mlagent.py           |   2 +-
 jorldy/config/vmpo/atari.py                   |   5 +-
 jorldy/config/vmpo/drone_delivery_mlagent.py  |   2 +-
 jorldy/config/vmpo/hopper_mlagent.py          |   2 +-
 jorldy/config/vmpo/pong_mlagent.py            |   2 +-
 jorldy/config/vmpo/procgen.py                 |   3 +-
 jorldy/core/agent/ppo.py                      |   2 +-
 jorldy/core/env/atari.py                      | 118 ++++++++++++------
 jorldy/core/env/mlagent.py                    |   7 +-
 jorldy/core/env/nes.py                        |   9 +-
 jorldy/core/env/procgen.py                    |  61 ++++++---
 jorldy/manager/eval_manager.py                |   2 +-
 jorldy/manager/log_manager.py                 |   2 +-
 jorldy/test/conftest.py                       |   4 +-
 79 files changed, 274 insertions(+), 172 deletions(-)

diff --git a/jorldy/config/ape_x/atari.py b/jorldy/config/ape_x/atari.py
index f4e15380..8d49e14f 100644
--- a/jorldy/config/ape_x/atari.py
+++ b/jorldy/config/ape_x/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/ape_x/pong_mlagent.py b/jorldy/config/ape_x/pong_mlagent.py
index c8ef3cfb..761abd46 100644
--- a/jorldy/config/ape_x/pong_mlagent.py
+++ b/jorldy/config/ape_x/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Ape-X Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "ape_x",
diff --git a/jorldy/config/ape_x/procgen.py b/jorldy/config/ape_x/procgen.py
index f44408ca..c9c4cff2 100644
--- a/jorldy/config/ape_x/procgen.py
+++ b/jorldy/config/ape_x/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/c51/atari.py b/jorldy/config/c51/atari.py
index 6bf5f226..86deb870 100644
--- a/jorldy/config/c51/atari.py
+++ b/jorldy/config/c51/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/c51/pong_mlagent.py b/jorldy/config/c51/pong_mlagent.py
index b2ff35cd..5bd5291b 100644
--- a/jorldy/config/c51/pong_mlagent.py
+++ b/jorldy/config/c51/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### C51 Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "c51",
diff --git a/jorldy/config/c51/procgen.py b/jorldy/config/c51/procgen.py
index 5d7b778c..1e52735d 100644
--- a/jorldy/config/c51/procgen.py
+++ b/jorldy/config/c51/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/ddpg/hopper_mlagent.py b/jorldy/config/ddpg/hopper_mlagent.py
index 491f641f..51041fa7 100644
--- a/jorldy/config/ddpg/hopper_mlagent.py
+++ b/jorldy/config/ddpg/hopper_mlagent.py
@@ -1,6 +1,6 @@
 ### DDPG Hopper Config ###
 
-env = {"name": "hopper_mlagent", "train_mode": True}
+env = {"name": "hopper_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "ddpg",
diff --git a/jorldy/config/double/atari.py b/jorldy/config/double/atari.py
index f74f2aee..31171a7a 100644
--- a/jorldy/config/double/atari.py
+++ b/jorldy/config/double/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/double/pong_mlagent.py b/jorldy/config/double/pong_mlagent.py
index 8231cd5f..fc8e9bfb 100644
--- a/jorldy/config/double/pong_mlagent.py
+++ b/jorldy/config/double/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Double DQN Pong MLAgent Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "double",
diff --git a/jorldy/config/double/procgen.py b/jorldy/config/double/procgen.py
index c83319be..3fd56e23 100644
--- a/jorldy/config/double/procgen.py
+++ b/jorldy/config/double/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/dqn/atari.py b/jorldy/config/dqn/atari.py
index 5216d13f..22bd50c6 100644
--- a/jorldy/config/dqn/atari.py
+++ b/jorldy/config/dqn/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
@@ -27,19 +28,19 @@
 }
 
 optim = {
-    "name": "rmsprop",
-    "lr": 2.5e-4,
+    "name": "adam",
+    "lr": 1e-4,
 }
 
 train = {
     "training": True,
     "load_path": None,
-    "run_step": 30000000,
+    "run_step": 10000000,
     "print_period": 10000,
     "save_period": 100000,
     "eval_iteration": 5,
     "record": True,
-    "record_period": 300000,
+    "record_period": 500000,
     # distributed setting
     "update_period": 32,
     "num_workers": 16,
diff --git a/jorldy/config/dqn/pong_mlagent.py b/jorldy/config/dqn/pong_mlagent.py
index f5c0ff68..f6f86df3 100644
--- a/jorldy/config/dqn/pong_mlagent.py
+++ b/jorldy/config/dqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "dqn",
diff --git a/jorldy/config/dqn/procgen.py b/jorldy/config/dqn/procgen.py
index 36a2153c..820b926e 100644
--- a/jorldy/config/dqn/procgen.py
+++ b/jorldy/config/dqn/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/dueling/atari.py b/jorldy/config/dueling/atari.py
index 0979dd01..d860ca5e 100644
--- a/jorldy/config/dueling/atari.py
+++ b/jorldy/config/dueling/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/dueling/pong_mlagent.py b/jorldy/config/dueling/pong_mlagent.py
index 40077c37..5a0ac23d 100644
--- a/jorldy/config/dueling/pong_mlagent.py
+++ b/jorldy/config/dueling/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Dueling DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "dueling",
diff --git a/jorldy/config/dueling/procgen.py b/jorldy/config/dueling/procgen.py
index 2627df37..fcef9408 100644
--- a/jorldy/config/dueling/procgen.py
+++ b/jorldy/config/dueling/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/icm_ppo/atari.py b/jorldy/config/icm_ppo/atari.py
index 770c204b..839d88d0 100644
--- a/jorldy/config/icm_ppo/atari.py
+++ b/jorldy/config/icm_ppo/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/icm_ppo/drone_delivery_mlagent.py b/jorldy/config/icm_ppo/drone_delivery_mlagent.py
index e251a250..c63001f7 100644
--- a/jorldy/config/icm_ppo/drone_delivery_mlagent.py
+++ b/jorldy/config/icm_ppo/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### ICM PPO Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "icm_ppo",
diff --git a/jorldy/config/icm_ppo/pong_mlagent.py b/jorldy/config/icm_ppo/pong_mlagent.py
index 93e3765a..a44daf3b 100644
--- a/jorldy/config/icm_ppo/pong_mlagent.py
+++ b/jorldy/config/icm_ppo/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### ICM PPO Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "icm_ppo",
diff --git a/jorldy/config/icm_ppo/procgen.py b/jorldy/config/icm_ppo/procgen.py
index 36774a4d..b16f08ee 100644
--- a/jorldy/config/icm_ppo/procgen.py
+++ b/jorldy/config/icm_ppo/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/icm_ppo/super_mario_bros.py b/jorldy/config/icm_ppo/super_mario_bros.py
index 3ad0b22c..d48731b6 100644
--- a/jorldy/config/icm_ppo/super_mario_bros.py
+++ b/jorldy/config/icm_ppo/super_mario_bros.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/iqn/atari.py b/jorldy/config/iqn/atari.py
index 31e48861..85f03ba4 100644
--- a/jorldy/config/iqn/atari.py
+++ b/jorldy/config/iqn/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/iqn/pong_mlagent.py b/jorldy/config/iqn/pong_mlagent.py
index 0e1a565b..58fb0296 100644
--- a/jorldy/config/iqn/pong_mlagent.py
+++ b/jorldy/config/iqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### IQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "iqn",
diff --git a/jorldy/config/iqn/procgen.py b/jorldy/config/iqn/procgen.py
index a0cd11d2..2c090c01 100644
--- a/jorldy/config/iqn/procgen.py
+++ b/jorldy/config/iqn/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/m_dqn/atari.py b/jorldy/config/m_dqn/atari.py
index f1eb334d..eb3feab8 100644
--- a/jorldy/config/m_dqn/atari.py
+++ b/jorldy/config/m_dqn/atari.py
@@ -8,8 +8,9 @@
     "img_height": 84,
     "stack_frame": 4,
     "no_op": True,
-    "reward_clip": False,
-    "dead_penalty": False,
+    "skip_frame": 4,
+    "reward_clip": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/m_dqn/pong_mlagent.py b/jorldy/config/m_dqn/pong_mlagent.py
index ec406c9b..ca8fd39a 100644
--- a/jorldy/config/m_dqn/pong_mlagent.py
+++ b/jorldy/config/m_dqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Munchausen DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "m_dqn",
diff --git a/jorldy/config/m_dqn/procgen.py b/jorldy/config/m_dqn/procgen.py
index 25d224e7..1174e2d8 100644
--- a/jorldy/config/m_dqn/procgen.py
+++ b/jorldy/config/m_dqn/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/m_iqn/atari.py b/jorldy/config/m_iqn/atari.py
index 5d473468..29856660 100644
--- a/jorldy/config/m_iqn/atari.py
+++ b/jorldy/config/m_iqn/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/m_iqn/pong_mlagent.py b/jorldy/config/m_iqn/pong_mlagent.py
index 743e3910..2b28ffea 100644
--- a/jorldy/config/m_iqn/pong_mlagent.py
+++ b/jorldy/config/m_iqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Munchausen IQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "m_iqn",
diff --git a/jorldy/config/m_iqn/procgen.py b/jorldy/config/m_iqn/procgen.py
index dc0968d4..d8181d2c 100644
--- a/jorldy/config/m_iqn/procgen.py
+++ b/jorldy/config/m_iqn/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/mpo/atari.py b/jorldy/config/mpo/atari.py
index aad0eb0b..1db67da0 100644
--- a/jorldy/config/mpo/atari.py
+++ b/jorldy/config/mpo/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/mpo/drone_delivery_mlagent.py b/jorldy/config/mpo/drone_delivery_mlagent.py
index 72e04914..faa2a18e 100644
--- a/jorldy/config/mpo/drone_delivery_mlagent.py
+++ b/jorldy/config/mpo/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### MPO Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "mpo",
diff --git a/jorldy/config/mpo/hopper_mlagent.py b/jorldy/config/mpo/hopper_mlagent.py
index 7c06618e..34f88ff5 100644
--- a/jorldy/config/mpo/hopper_mlagent.py
+++ b/jorldy/config/mpo/hopper_mlagent.py
@@ -1,6 +1,6 @@
 ### MPO Hopper Config ###
 
-env = {"name": "hopper_mlagent", "train_mode": True}
+env = {"name": "hopper_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "mpo",
diff --git a/jorldy/config/mpo/pong_mlagent.py b/jorldy/config/mpo/pong_mlagent.py
index dbc8e2c1..7ff65585 100644
--- a/jorldy/config/mpo/pong_mlagent.py
+++ b/jorldy/config/mpo/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### MPO Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "mpo",
diff --git a/jorldy/config/mpo/procgen.py b/jorldy/config/mpo/procgen.py
index cab53be1..524eccc8 100644
--- a/jorldy/config/mpo/procgen.py
+++ b/jorldy/config/mpo/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/multistep/atari.py b/jorldy/config/multistep/atari.py
index a3741418..58065542 100644
--- a/jorldy/config/multistep/atari.py
+++ b/jorldy/config/multistep/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/multistep/pong_mlagent.py b/jorldy/config/multistep/pong_mlagent.py
index 403f68ee..a2a841fb 100644
--- a/jorldy/config/multistep/pong_mlagent.py
+++ b/jorldy/config/multistep/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Multistep DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "multistep",
diff --git a/jorldy/config/multistep/procgen.py b/jorldy/config/multistep/procgen.py
index b37dd63a..b2199c90 100644
--- a/jorldy/config/multistep/procgen.py
+++ b/jorldy/config/multistep/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/noisy/atari.py b/jorldy/config/noisy/atari.py
index 97bd8283..c01d199d 100644
--- a/jorldy/config/noisy/atari.py
+++ b/jorldy/config/noisy/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": False,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/noisy/pong_mlagent.py b/jorldy/config/noisy/pong_mlagent.py
index 8ce5a430..d1fbe4d2 100644
--- a/jorldy/config/noisy/pong_mlagent.py
+++ b/jorldy/config/noisy/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Noisy DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "noisy",
diff --git a/jorldy/config/noisy/procgen.py b/jorldy/config/noisy/procgen.py
index 6d0ef0bf..1efbb428 100644
--- a/jorldy/config/noisy/procgen.py
+++ b/jorldy/config/noisy/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/per/atari.py b/jorldy/config/per/atari.py
index d0cbfd61..f0acd276 100644
--- a/jorldy/config/per/atari.py
+++ b/jorldy/config/per/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/per/pong_mlagent.py b/jorldy/config/per/pong_mlagent.py
index 00b57a12..fd0ecea5 100644
--- a/jorldy/config/per/pong_mlagent.py
+++ b/jorldy/config/per/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### PER Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "per",
diff --git a/jorldy/config/per/procgen.py b/jorldy/config/per/procgen.py
index 857bb0e4..692e7a16 100644
--- a/jorldy/config/per/procgen.py
+++ b/jorldy/config/per/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/ppo/atari.py b/jorldy/config/ppo/atari.py
index a04727b6..1afb3e72 100644
--- a/jorldy/config/ppo/atari.py
+++ b/jorldy/config/ppo/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
@@ -25,6 +26,7 @@
     "vf_coef": 1.0,
     "ent_coef": 0.01,
     "clip_grad_norm": 1.0,
+    "use_standardization": True,
 }
 
 optim = {
@@ -35,12 +37,12 @@
 train = {
     "training": True,
     "load_path": None,
-    "run_step": 30000000,
+    "run_step": 10000000,
     "print_period": 10000,
     "save_period": 100000,
     "eval_iteration": 5,
     "record": True,
-    "record_period": 300000,
+    "record_period": 500000,
     # distributed setting
     "distributed_batch_size": 256,
     "update_period": agent["n_step"],
diff --git a/jorldy/config/ppo/drone_delivery_mlagent.py b/jorldy/config/ppo/drone_delivery_mlagent.py
index 3a672864..c754d1d8 100644
--- a/jorldy/config/ppo/drone_delivery_mlagent.py
+++ b/jorldy/config/ppo/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### PPO Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "ppo",
diff --git a/jorldy/config/ppo/hopper_mlagent.py b/jorldy/config/ppo/hopper_mlagent.py
index 792634e1..277eb0b0 100644
--- a/jorldy/config/ppo/hopper_mlagent.py
+++ b/jorldy/config/ppo/hopper_mlagent.py
@@ -1,6 +1,6 @@
 ### PPO Hopper Config ###
 
-env = {"name": "hopper_mlagent", "train_mode": True}
+env = {"name": "hopper_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "ppo",
diff --git a/jorldy/config/ppo/pong_mlagent.py b/jorldy/config/ppo/pong_mlagent.py
index 4f7adda9..22b4dc9d 100644
--- a/jorldy/config/ppo/pong_mlagent.py
+++ b/jorldy/config/ppo/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### PPO Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "ppo",
diff --git a/jorldy/config/ppo/procgen.py b/jorldy/config/ppo/procgen.py
index b6138d93..37cffc61 100644
--- a/jorldy/config/ppo/procgen.py
+++ b/jorldy/config/ppo/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/ppo/super_mario_bros.py b/jorldy/config/ppo/super_mario_bros.py
index 4252e03f..07603cfe 100644
--- a/jorldy/config/ppo/super_mario_bros.py
+++ b/jorldy/config/ppo/super_mario_bros.py
@@ -7,9 +7,9 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    #     "no_op": True,
+    "no_op": True,
     "reward_clip": True,
-    "dead_penalty": False,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/qrdqn/atari.py b/jorldy/config/qrdqn/atari.py
index a708a72c..e983b119 100644
--- a/jorldy/config/qrdqn/atari.py
+++ b/jorldy/config/qrdqn/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/qrdqn/pong_mlagent.py b/jorldy/config/qrdqn/pong_mlagent.py
index 880c82b7..6255452b 100644
--- a/jorldy/config/qrdqn/pong_mlagent.py
+++ b/jorldy/config/qrdqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### QRDQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "qrdqn",
diff --git a/jorldy/config/qrdqn/procgen.py b/jorldy/config/qrdqn/procgen.py
index c691bb93..dc56f7b8 100644
--- a/jorldy/config/qrdqn/procgen.py
+++ b/jorldy/config/qrdqn/procgen.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/rainbow/atari.py b/jorldy/config/rainbow/atari.py
index d3f44082..d5fb18b5 100644
--- a/jorldy/config/rainbow/atari.py
+++ b/jorldy/config/rainbow/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": False,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/rainbow/pong_mlagent.py b/jorldy/config/rainbow/pong_mlagent.py
index 7616a25f..b5f9996f 100644
--- a/jorldy/config/rainbow/pong_mlagent.py
+++ b/jorldy/config/rainbow/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Rainbow DQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "rainbow",
diff --git a/jorldy/config/rainbow/procgen.py b/jorldy/config/rainbow/procgen.py
index 9f3e8ed5..3cd7bc03 100644
--- a/jorldy/config/rainbow/procgen.py
+++ b/jorldy/config/rainbow/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/rainbow_iqn/atari.py b/jorldy/config/rainbow_iqn/atari.py
index 2808ccf3..0a6bf21e 100644
--- a/jorldy/config/rainbow_iqn/atari.py
+++ b/jorldy/config/rainbow_iqn/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/rainbow_iqn/pong_mlagent.py b/jorldy/config/rainbow_iqn/pong_mlagent.py
index 7bacc04f..cd9da6d4 100644
--- a/jorldy/config/rainbow_iqn/pong_mlagent.py
+++ b/jorldy/config/rainbow_iqn/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### Rainbow IQN Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "rainbow_iqn",
diff --git a/jorldy/config/rainbow_iqn/procgen.py b/jorldy/config/rainbow_iqn/procgen.py
index 5a7492bd..ae695784 100644
--- a/jorldy/config/rainbow_iqn/procgen.py
+++ b/jorldy/config/rainbow_iqn/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/rnd_ppo/atari.py b/jorldy/config/rnd_ppo/atari.py
index 51628430..95c51efc 100644
--- a/jorldy/config/rnd_ppo/atari.py
+++ b/jorldy/config/rnd_ppo/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/rnd_ppo/drone_delivery_mlagent.py b/jorldy/config/rnd_ppo/drone_delivery_mlagent.py
index d1f4704a..dbce3783 100644
--- a/jorldy/config/rnd_ppo/drone_delivery_mlagent.py
+++ b/jorldy/config/rnd_ppo/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### RND PPO Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "rnd_ppo",
diff --git a/jorldy/config/rnd_ppo/pong_mlagent.py b/jorldy/config/rnd_ppo/pong_mlagent.py
index 38b22588..938066b3 100644
--- a/jorldy/config/rnd_ppo/pong_mlagent.py
+++ b/jorldy/config/rnd_ppo/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### RND PPO Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "rnd_ppo",
diff --git a/jorldy/config/rnd_ppo/procgen.py b/jorldy/config/rnd_ppo/procgen.py
index 1b3e6185..77cae6eb 100644
--- a/jorldy/config/rnd_ppo/procgen.py
+++ b/jorldy/config/rnd_ppo/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/config/rnd_ppo/super_mario_bros.py b/jorldy/config/rnd_ppo/super_mario_bros.py
index 6723a313..a688e4a2 100644
--- a/jorldy/config/rnd_ppo/super_mario_bros.py
+++ b/jorldy/config/rnd_ppo/super_mario_bros.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": False,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/sac/drone_delivery_mlagent.py b/jorldy/config/sac/drone_delivery_mlagent.py
index 43ec8352..61ae09b7 100644
--- a/jorldy/config/sac/drone_delivery_mlagent.py
+++ b/jorldy/config/sac/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### SAC Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "sac",
diff --git a/jorldy/config/sac/hopper_mlagent.py b/jorldy/config/sac/hopper_mlagent.py
index 271e9ac4..106000cc 100644
--- a/jorldy/config/sac/hopper_mlagent.py
+++ b/jorldy/config/sac/hopper_mlagent.py
@@ -1,6 +1,6 @@
 ### SAC Hopper Config ###
 
-env = {"name": "hopper_mlagent", "train_mode": True}
+env = {"name": "hopper_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "sac",
diff --git a/jorldy/config/vmpo/atari.py b/jorldy/config/vmpo/atari.py
index 7e740db1..134dee22 100644
--- a/jorldy/config/vmpo/atari.py
+++ b/jorldy/config/vmpo/atari.py
@@ -7,9 +7,10 @@
     "img_width": 84,
     "img_height": 84,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
-    "dead_penalty": True,
+    "episodic_life": True,
 }
 
 agent = {
diff --git a/jorldy/config/vmpo/drone_delivery_mlagent.py b/jorldy/config/vmpo/drone_delivery_mlagent.py
index 040c9057..8b324548 100644
--- a/jorldy/config/vmpo/drone_delivery_mlagent.py
+++ b/jorldy/config/vmpo/drone_delivery_mlagent.py
@@ -1,6 +1,6 @@
 ### V-MPO Drone Delivery Config ###
 
-env = {"name": "drone_delivery_mlagent", "train_mode": True}
+env = {"name": "drone_delivery_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "vmpo",
diff --git a/jorldy/config/vmpo/hopper_mlagent.py b/jorldy/config/vmpo/hopper_mlagent.py
index 263ed661..f295717b 100644
--- a/jorldy/config/vmpo/hopper_mlagent.py
+++ b/jorldy/config/vmpo/hopper_mlagent.py
@@ -1,6 +1,6 @@
 ### V-MPO Hopper Config ###
 
-env = {"name": "hopper_mlagent", "train_mode": True}
+env = {"name": "hopper_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "vmpo",
diff --git a/jorldy/config/vmpo/pong_mlagent.py b/jorldy/config/vmpo/pong_mlagent.py
index 93c52a67..81d54517 100644
--- a/jorldy/config/vmpo/pong_mlagent.py
+++ b/jorldy/config/vmpo/pong_mlagent.py
@@ -1,6 +1,6 @@
 ### V-MPO Pong_ML-Agents Config ###
 
-env = {"name": "pong_mlagent", "train_mode": True}
+env = {"name": "pong_mlagent", "time_scale": 12.0}
 
 agent = {
     "name": "vmpo",
diff --git a/jorldy/config/vmpo/procgen.py b/jorldy/config/vmpo/procgen.py
index 9f41d444..00f958f6 100644
--- a/jorldy/config/vmpo/procgen.py
+++ b/jorldy/config/vmpo/procgen.py
@@ -5,7 +5,8 @@
     "render": False,
     "gray_img": True,
     "stack_frame": 4,
-    "no_op": False,
+    "no_op": True,
+    "skip_frame": 4,
     "reward_clip": True,
 }
 
diff --git a/jorldy/core/agent/ppo.py b/jorldy/core/agent/ppo.py
index 79adecd2..8d56c556 100644
--- a/jorldy/core/agent/ppo.py
+++ b/jorldy/core/agent/ppo.py
@@ -101,7 +101,7 @@ def learn(self):
                 )
             if self.use_standardization:
                 adv = (adv - adv.mean(dim=1, keepdim=True)) / (
-                    adv.std(dim=1, keepdim=True) + 1e-7
+                    adv.std(dim=1, keepdim=True) + 1e-6
                 )
             adv = adv.view(-1, 1)
             ret = adv + value
diff --git a/jorldy/core/env/atari.py b/jorldy/core/env/atari.py
index 8aeecab7..a99081e3 100644
--- a/jorldy/core/env/atari.py
+++ b/jorldy/core/env/atari.py
@@ -4,7 +4,7 @@
 from .utils import ImgProcessor
 from .base import BaseEnv
 
-COMMON_VERSION = "Deterministic-v4"
+COMMON_VERSION = "NoFrameskip-v4"
 
 
 class _Atari(BaseEnv):
@@ -19,9 +19,12 @@ class _Atari(BaseEnv):
         stack_frame (int): the number of stacked frame in one single state.
         life_key (str): key of life query function in emulator.
         no_op (bool): parameter that determine whether or not to operate during the first 30(no_op_max) steps.
+        skip_frame (int) : the number of skipped frame.
         reward_clip (bool): parameter that determine whether to use reward clipping.
-        reward_scale (float): reward normalization denominator.
-        dead_penatly (bool): parameter that determine whether to use penalty when the agent dies.
+        episodic_life (bool): parameter that determine done is True when dead is True.
+        fire_reset (bool): parameter that determine take action on reset for environments that are fixed until firing.
+        train_mode (bool): parameter that determine whether train mode or not.
+
     """
 
     def __init__(
@@ -33,10 +36,12 @@ def __init__(
         img_height=84,
         stack_frame=4,
         life_key="lives",
-        no_op=False,
-        reward_clip=False,
-        reward_scale=None,
-        dead_penalty=False,
+        no_op=True,
+        skip_frame=4,
+        reward_clip=True,
+        episodic_life=True,
+        fire_reset=True,
+        train_mode=True,
         **kwargs,
     ):
         self.render = render
@@ -61,30 +66,47 @@ def __init__(
         self.life_key = life_key
         self.no_op = no_op
         self.no_op_max = 30
+        assert isinstance(skip_frame, int) and skip_frame > 0
+        self.skip_frame = skip_frame
+        self.skip_frame_buffer = np.zeros(
+            (2,) + self.env.observation_space.shape, dtype=np.uint8
+        )
         self.reward_clip = reward_clip
-        self.reward_scale = reward_scale
-        self.dead_penalty = dead_penalty
+        self.episodic_life = episodic_life
+        self.was_real_done = True
+        self.fire_reset = fire_reset and (
+            self.env.unwrapped.get_action_meanings()[1] == "FIRE"
+        )
+        self.train_mode = train_mode
 
         print(f"{name} Start!")
         print(f"state size: {self.state_size}")
         print(f"action size: {self.action_size}")
 
     def reset(self):
-        self.env.reset()
-        state, reward, _, info = self.env.step(1)
-
-        self.score = reward
-        self.life = info[self.life_key]
-
-        if self.no_op:
-            for _ in range(np.random.randint(0, self.no_op_max)):
+        total_reward = 0
+        if self.was_real_done:
+            state = self.env.reset()
+            self.was_real_done = False
+            if self.no_op:
+                num_no_op = np.random.randint(1, self.no_op_max)
+                for i in range(num_no_op):
+                    state, reward, done, info = self.env.step(0)
+                    total_reward += reward
+                    if done:
+                        self.env.reset()
+            if self.fire_reset:
+                state, reward, done, info = self.env.step(1)
+                self.life = info[self.life_key]
+                total_reward += reward
+        else:
+            if self.fire_reset:
+                state, reward, _, info = self.env.step(1)
+            else:
                 state, reward, _, info = self.env.step(0)
-                self.score += reward
-                if self.life != info[self.life_key]:
-                    if self.life > info[self.life_key]:
-                        state, reward, _, _ = self.env.step(1)
-                        self.score += reward
-                    self.life = info[self.life_key]
+            self.life = info[self.life_key]
+            total_reward += reward
+        self.score = total_reward
 
         state = self.img_processor.convert_img(state)
         self.stacked_state = np.tile(state, (self.stack_frame, 1, 1))
@@ -95,33 +117,47 @@ def step(self, action):
         if self.render:
             self.env.render()
 
-        next_state, reward, done, info = self.env.step(action.item())
-        self.score += reward
-
-        dead = False
-        if self.life != info[self.life_key] and not done:
-            if self.life > info[self.life_key]:
-                state, _reward, _, _ = self.env.step(1)
-                self.score += _reward
-                dead = True
-            self.life = info[self.life_key]
+        dead, total_reward = False, 0
+        for i in range(self.skip_frame):
+            next_state, reward, done, info = self.env.step(action.item())
+            total_reward += reward
+            _dead = False
+            if self.life != info[self.life_key] and not done:
+                if self.life > info[self.life_key]:
+                    if self.fire_reset:
+                        next_state, reward, _, _ = self.env.step(1)
+                        total_reward += reward
+                    _dead = True
+                self.life = info[self.life_key]
+
+            dead = dead or _dead
+            if i == self.skip_frame - 2:
+                self.skip_frame_buffer[0] = next_state
+            if i == self.skip_frame - 1:
+                self.skip_frame_buffer[1] = next_state
+
+            if done:
+                self.was_real_done = True
+                break
+
+        self.score += total_reward
+
+        next_state = self.skip_frame_buffer.max(axis=0)
         next_state = self.img_processor.convert_img(next_state)
         self.stacked_state = np.concatenate(
             (self.stacked_state[self.num_channel :], next_state), axis=0
         )
 
         if self.reward_clip:
-            reward = (
-                reward / self.reward_scale if self.reward_scale else np.tanh(reward)
-            )
+            total_reward = np.sign(total_reward)
 
-        if dead and self.dead_penalty:
-            reward = -1
+        if self.episodic_life and self.train_mode:
+            done = dead or done
 
-        next_state, reward, done = map(
-            lambda x: np.expand_dims(x, 0), [self.stacked_state, [reward], [done]]
+        next_state, total_reward, done = map(
+            lambda x: np.expand_dims(x, 0), [self.stacked_state, [total_reward], [done]]
         )
-        return (next_state, reward, done)
+        return (next_state, total_reward, done)
 
     def close(self):
         self.env.close()
diff --git a/jorldy/core/env/mlagent.py b/jorldy/core/env/mlagent.py
index 5772b26a..18d14867 100644
--- a/jorldy/core/env/mlagent.py
+++ b/jorldy/core/env/mlagent.py
@@ -18,11 +18,11 @@ class _MLAgent(BaseEnv):
 
     Args:
         env_name (str): name of environment in ML-Agents.
-        train_mode (bool): parameter that determine whether to use low-resource training rendering mode.
         render (bool): parameter that determine whether to render.
+        time_scale (bool): parameter that determine frame time_scale.
     """
 
-    def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs):
+    def __init__(self, env_name, render=False, time_scale=12.0, id=None, **kwargs):
         env_path = f"./core/env/mlagents/{env_name}/{match_build()}/{env_name}"
         id = (
             np.random.randint(65534 - UnityEnvironment.BASE_ENVIRONMENT_PORT)
@@ -43,7 +43,6 @@ def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs):
 
         self.env.reset()
 
-        self.train_mode = train_mode
         self.score = 0
 
         self.behavior_name = list(self.env.behavior_specs.keys())[0]
@@ -51,7 +50,7 @@ def __init__(self, env_name, train_mode=True, render=False, id=None, **kwargs):
 
         self.is_continuous_action = self.spec.action_spec.is_continuous()
 
-        engine_configuration_channel.set_configuration_parameters(time_scale=12.0)
+        engine_configuration_channel.set_configuration_parameters(time_scale=time_scale)
         dec, term = self.env.get_steps(self.behavior_name)
 
     def reset(self):
diff --git a/jorldy/core/env/nes.py b/jorldy/core/env/nes.py
index 71088575..88d4fc38 100644
--- a/jorldy/core/env/nes.py
+++ b/jorldy/core/env/nes.py
@@ -17,7 +17,9 @@ class _Nes(_Atari):
     """
 
     def __init__(self, name, **kwargs):
-        super(_Nes, self).__init__(name=name, life_key="life", **kwargs)
+        super(_Nes, self).__init__(
+            name=name, life_key="life", fire_reset=False, **kwargs
+        )
         self.env = JoypadSpace(self.env, RIGHT_ONLY)
         print(f"action size changed: {self.action_size} -> {self.env.action_space.n}")
         self.action_size = self.env.action_space.n
@@ -29,7 +31,4 @@ def get_frame(self):
 
 class SuperMarioBros(_Nes):
     def __init__(self, **kwargs):
-        reward_scale = 15.0
-        super(SuperMarioBros, self).__init__(
-            "SuperMarioBros-v0", reward_scale=reward_scale, **kwargs
-        )
+        super(SuperMarioBros, self).__init__("SuperMarioBros-v0", **kwargs)
diff --git a/jorldy/core/env/procgen.py b/jorldy/core/env/procgen.py
index 87751366..991a8e94 100644
--- a/jorldy/core/env/procgen.py
+++ b/jorldy/core/env/procgen.py
@@ -17,6 +17,7 @@ class _Procgen(BaseEnv):
         img_height (int): height of image input.
         stack_frame (int): the number of stacked frame in one single state.
         no_op (bool): parameter that determine whether or not to operate during the first 30(no_op_max) steps.
+        skip_frame (int) : the number of skipped frame.
         reward_clip (bool): parameter that determine whether to use reward clipping.
     """
 
@@ -28,8 +29,9 @@ def __init__(
         img_width=64,
         img_height=64,
         stack_frame=4,
-        no_op=False,
-        reward_clip=False,
+        no_op=True,
+        skip_frame=4,
+        reward_clip=True,
         **kwargs,
     ):
         self.render = render
@@ -46,12 +48,18 @@ def __init__(
         )
 
         self.env = ProcgenEnv(1, name, render_mode="rgb_array")
+
         self.state_size = [stack_frame, img_height, img_width]
         self.action_size = self.env.action_space.n
         self.action_type = "discrete"
         self.score = 0
         self.no_op = no_op
         self.no_op_max = 30
+        assert isinstance(skip_frame, int) and skip_frame > 0
+        self.skip_frame = skip_frame
+        self.skip_frame_buffer = np.zeros(
+            (2,) + self.env.observation_space["rgb"].shape, dtype=np.uint8
+        )
         self.reward_clip = reward_clip
 
         print(f"{name} Start!")
@@ -59,16 +67,22 @@ def __init__(
         print(f"action size: {self.action_size}")
 
     def reset(self):
-        state = self.env.reset()["rgb"][0]
-
-        obs, reward, _, info = self.env.step(np.ones(1))
-        self.score = reward[0]
+        self.env.reset()
+        total_reward = 0
+        obs = self.env.reset()
 
         if self.no_op:
-            for _ in range(np.random.randint(0, self.no_op_max)):
-                obs, reward, _, info = self.env.step(np.zeros(1))
-                self.score += reward
-        state = self.img_processor.convert_img(obs["rgb"][0])
+            num_no_op = np.random.randint(1, self.no_op_max)
+            for i in range(num_no_op):
+                obs, reward, done, info = self.env.step(np.zeros(1))
+                total_reward += reward[0]
+                if done:
+                    obs = self.env.reset()
+
+        state = obs["rgb"][0]
+        self.score = total_reward
+
+        state = self.img_processor.convert_img(state)
         self.stacked_state = np.tile(state, (self.stack_frame, 1, 1))
         state = np.expand_dims(self.stacked_state, 0)
         return state
@@ -76,21 +90,34 @@ def reset(self):
     def step(self, action):
         if self.render:
             self.env.render()
-        next_obs, reward, done, info = self.env.step(action.reshape((1,)))
-        self.score += reward[0]
 
-        next_state = self.img_processor.convert_img(next_obs["rgb"][0])
+        total_reward = 0
+        for i in range(self.skip_frame):
+            next_obs, reward, done, info = self.env.step(action.reshape((1,)))
+            next_state = next_obs["rgb"][0]
+            total_reward += reward
+
+            if i == self.skip_frame - 2:
+                self.skip_frame_buffer[0] = next_state
+            if i == self.skip_frame - 1:
+                self.skip_frame_buffer[1] = next_state
+
+            if done:
+                break
+
+        next_state = self.skip_frame_buffer.max(axis=0)
+        next_state = self.img_processor.convert_img(next_state)
         self.stacked_state = np.concatenate(
             (self.stacked_state[self.num_channel :], next_state), axis=0
         )
 
         if self.reward_clip:
-            reward = np.tanh(reward)
+            total_reward = np.sign(total_reward)
 
-        next_state, reward, done = map(
-            lambda x: np.expand_dims(x, 0), [self.stacked_state, reward, done]
+        next_state, total_reward, done = map(
+            lambda x: np.expand_dims(x, 0), [self.stacked_state, total_reward, done]
         )
-        return (next_state, reward, done)
+        return (next_state, total_reward, done)
 
     def close(self):
         self.env.close()
diff --git a/jorldy/manager/eval_manager.py b/jorldy/manager/eval_manager.py
index b95bacf4..779bb923 100644
--- a/jorldy/manager/eval_manager.py
+++ b/jorldy/manager/eval_manager.py
@@ -3,7 +3,7 @@
 
 class EvalManager:
     def __init__(self, Env, env_config, iteration=10, record=None, record_period=None):
-        self.env = Env(**env_config)
+        self.env = Env(**env_config, train_mode=False)
         self.iteration = iteration if iteration else 10
         assert iteration > 0
         self.record = record and self.env.recordable()
diff --git a/jorldy/manager/log_manager.py b/jorldy/manager/log_manager.py
index cedb9ce5..6b269f5b 100644
--- a/jorldy/manager/log_manager.py
+++ b/jorldy/manager/log_manager.py
@@ -9,7 +9,7 @@
 class LogManager:
     def __init__(self, env, id, experiment=None):
         self.id = id
-        now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+        now = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
         self.path = (
             f"./logs/{experiment}/{env}/{id}/{now}/"
             if experiment
diff --git a/jorldy/test/conftest.py b/jorldy/test/conftest.py
index b5909cb3..7a045381 100644
--- a/jorldy/test/conftest.py
+++ b/jorldy/test/conftest.py
@@ -7,7 +7,7 @@
 
 
 class _MockEnv:
-    def __init__(self, state_size, action_size, action_type, episode_len):
+    def __init__(self, state_size, action_size, action_type, episode_len, **kwargs):
         self.state_size = state_size
         self.action_size = action_size
         self.action_type = action_type
@@ -46,7 +46,7 @@ def recordable(self):
 
 
 class _MockAgent:
-    def __init__(self, state_size, action_size, action_type):
+    def __init__(self, state_size, action_size, action_type, **kwargs):
         self.state_size = state_size
         self.action_size = action_size
         self.action_type = action_type