From 13db41abdfcc99f62312bff557f526ac352ecd49 Mon Sep 17 00:00:00 2001 From: sanggusti Date: Wed, 10 Jan 2024 20:11:29 +0700 Subject: [PATCH] Feat: Update rl examples There are bugs on env.state() since the current version of gym are having bugs with these examples. This commit is to solve it --- reinforcement_learning/actor_critic.py | 4 ++-- reinforcement_learning/reinforce.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/reinforcement_learning/actor_critic.py b/reinforcement_learning/actor_critic.py index c5a3ee6d79..b58437683e 100644 --- a/reinforcement_learning/actor_critic.py +++ b/reinforcement_learning/actor_critic.py @@ -141,7 +141,7 @@ def main(): for i_episode in count(1): # reset environment and episode reward - state, _ = env.reset() + state = env.reset() ep_reward = 0 # for each episode, only run 9999 steps so that we don't @@ -152,7 +152,7 @@ def main(): action = select_action(state) # take the action - state, reward, done, _, _ = env.step(action) + state, reward, done, _ = env.step(action) if args.render: env.render() diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py index 961598174c..8c1cda5d8a 100644 --- a/reinforcement_learning/reinforce.py +++ b/reinforcement_learning/reinforce.py @@ -81,11 +81,11 @@ def finish_episode(): def main(): running_reward = 10 for i_episode in count(1): - state, _ = env.reset() + state = env.reset() ep_reward = 0 for t in range(1, 10000): # Don't infinite loop while learning action = select_action(state) - state, reward, done, _, _ = env.step(action) + state, reward, done, _ = env.step(action) if args.render: env.render() policy.rewards.append(reward)