diff --git a/reinforcement_learning/actor_critic.py b/reinforcement_learning/actor_critic.py index c5a3ee6d79..b58437683e 100644 --- a/reinforcement_learning/actor_critic.py +++ b/reinforcement_learning/actor_critic.py @@ -141,7 +141,7 @@ def main(): for i_episode in count(1): # reset environment and episode reward - state, _ = env.reset() + state = env.reset() ep_reward = 0 # for each episode, only run 9999 steps so that we don't @@ -152,7 +152,7 @@ def main(): action = select_action(state) # take the action - state, reward, done, _, _ = env.step(action) + state, reward, done, _ = env.step(action) if args.render: env.render() diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py index 961598174c..8c1cda5d8a 100644 --- a/reinforcement_learning/reinforce.py +++ b/reinforcement_learning/reinforce.py @@ -81,11 +81,11 @@ def finish_episode(): def main(): running_reward = 10 for i_episode in count(1): - state, _ = env.reset() + state = env.reset() ep_reward = 0 for t in range(1, 10000): # Don't infinite loop while learning action = select_action(state) - state, reward, done, _, _ = env.step(action) + state, reward, done, _ = env.step(action) if args.render: env.render() policy.rewards.append(reward)