diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index ca6d0c6dc..1873de0b3 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -128,7 +128,7 @@ jobs: run: poetry install --with pytest,procgen - name: Downgrade setuptools run: poetry run pip install setuptools==59.5.0 - - name: Run pybullet tests + - name: Run procgen tests run: poetry run pytest tests/test_procgen.py test-mujoco-envs: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 46cfcdd1c..71477b47e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ Good luck and have fun! ```bash poetry install poetry install --with atari -poetry install --with pybullet +poetry install --with mujoco ``` Then you can run the scripts under the poetry environment in two ways: `poetry run` or `poetry shell`. diff --git a/README.md b/README.md index 2a6ceb6f0..11a4401f0 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,16 @@ [](https://huggingface.co/cleanrl) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vwxyzjn/cleanrl/blob/master/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb) +# ⚠️ NOTE: Gym 0.26.1 Migration + +This branch is an ongoing effort to integrate the latest gym into CleanRL. Checkout [vwxyzjn/cleanrl#277](https://github.com/vwxyzjn/cleanrl/pull/277) for the current progress. + +Things that work: +* `dqn.py` +* `dqn_jax.py` +* `ppo.py` + +---------- CleanRL is a Deep Reinforcement Learning library that provides high-quality single-file implementation with research-friendly features. The implementation is clean and simple, yet we can scale it to run thousands of experiments using AWS Batch. The highlight features of CleanRL are: @@ -93,11 +103,11 @@ python cleanrl/ppo_atari_envpool.py --env-id BreakoutNoFrameskip-v4 # Side effects such as lower sample efficiency might occur poetry run python ppo_atari_envpool.py --clip-coef=0.2 --num-envs=16 --num-minibatches=8 --num-steps=128 --update-epochs=3 -# pybullet -poetry install --with pybullet -python cleanrl/td3_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 -python cleanrl/ddpg_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 -python cleanrl/sac_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 +# mujoco +poetry install --with mujoco +python cleanrl/td3_continuous_action.py --env-id HalfCheetah-v4 +python cleanrl/ddpg_continuous_action.py --env-id HalfCheetah-v4 +python cleanrl/sac_continuous_action.py --env-id HalfCheetah-v4 # procgen poetry install --with procgen diff --git a/cleanrl/atari_wrappers.py b/cleanrl/atari_wrappers.py new file mode 100644 index 000000000..4ffb4a03e --- /dev/null +++ b/cleanrl/atari_wrappers.py @@ -0,0 +1,254 @@ +from typing import Dict, Tuple + +import gym +import numpy as np +from gym import spaces + +try: + import cv2 # pytype:disable=import-error + + cv2.ocl.setUseOpenCL(False) +except ImportError: + cv2 = None + +from stable_baselines3.common.type_aliases import Gym26ResetReturn, Gym26StepReturn + + +class NoopResetEnv(gym.Wrapper): + """ + Sample initial states by taking random number of no-ops on reset. + No-op is assumed to be action 0. + + :param env: the environment to wrap + :param noop_max: the maximum value of no-ops to run + """ + + def __init__(self, env: gym.Env, noop_max: int = 30): + gym.Wrapper.__init__(self, env) + self.noop_max = noop_max + self.override_num_noops = None + self.noop_action = 0 + assert env.unwrapped.get_action_meanings()[0] == "NOOP" + + def reset(self, **kwargs) -> Tuple[np.ndarray, Dict]: + self.env.reset(**kwargs) + if self.override_num_noops is not None: + noops = self.override_num_noops + else: + noops = self.unwrapped.np_random.integers(1, self.noop_max + 1) + assert noops > 0 + obs = np.zeros(0) + info = {} + for _ in range(noops): + obs, _, done, truncated, info = self.env.step(self.noop_action) + if done or truncated: + obs, info = self.env.reset(**kwargs) + return obs, info + + +class FireResetEnv(gym.Wrapper): + """ + Take action on reset for environments that are fixed until firing. + + :param env: the environment to wrap + """ + + def __init__(self, env: gym.Env): + gym.Wrapper.__init__(self, env) + assert env.unwrapped.get_action_meanings()[1] == "FIRE" + assert len(env.unwrapped.get_action_meanings()) >= 3 + + def reset(self, **kwargs) -> Tuple[np.ndarray, Dict]: + self.env.reset(**kwargs) + obs, _, done, truncated, _ = self.env.step(1) + if done or truncated: + self.env.reset(**kwargs) + obs, _, done, truncated, _ = self.env.step(2) + if done or truncated: + self.env.reset(**kwargs) + return obs, {} + + +class EpisodicLifeEnv(gym.Wrapper): + """ + Make end-of-life == end-of-episode, but only reset on true game over. + Done by DeepMind for the DQN and co. since it helps value estimation. + + :param env: the environment to wrap + """ + + def __init__(self, env: gym.Env): + gym.Wrapper.__init__(self, env) + self.lives = 0 + self.was_real_done = True + + def step(self, action: int) -> Gym26StepReturn: + obs, reward, done, truncated, info = self.env.step(action) + self.was_real_done = done + # check current lives, make loss of life terminal, + # then update lives to handle bonus lives + lives = self.env.unwrapped.ale.lives() + if 0 < lives < self.lives: + # for Qbert sometimes we stay in lives == 0 condition for a few frames + # so its important to keep lives > 0, so that we only reset once + # the environment advertises done. + done = True + self.lives = lives + return obs, reward, done, truncated, info + + def reset(self, **kwargs) -> Tuple[np.ndarray, Dict]: + """ + Calls the Gym environment reset, only when lives are exhausted. + This way all states are still reachable even though lives are episodic, + and the learner need not know about any of this behind-the-scenes. + + :param kwargs: Extra keywords passed to env.reset() call + :return: the first observation of the environment + """ + if self.was_real_done: + obs, info = self.env.reset(**kwargs) + else: + # no-op step to advance from terminal/lost life state + obs, _, _, _, info = self.env.step(0) + self.lives = self.env.unwrapped.ale.lives() + return obs, info + + +class MaxAndSkipEnv(gym.Wrapper): + """ + Return only every ``skip``-th frame (frameskipping) + + :param env: the environment + :param skip: number of ``skip``-th frame + """ + + def __init__(self, env: gym.Env, skip: int = 4): + gym.Wrapper.__init__(self, env) + # most recent raw observations (for max pooling across time steps) + self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=env.observation_space.dtype) + self._skip = skip + + def step(self, action: int) -> Gym26StepReturn: + """ + Step the environment with the given action + Repeat action, sum reward, and max over last observations. + + :param action: the action + :return: observation, reward, done, information + """ + total_reward = 0.0 + terminated = truncated = False + for i in range(self._skip): + obs, reward, terminated, truncated, info = self.env.step(action) + done = terminated or truncated + if i == self._skip - 2: + self._obs_buffer[0] = obs + if i == self._skip - 1: + self._obs_buffer[1] = obs + total_reward += reward + if done: + break + # Note that the observation on the done=True frame + # doesn't matter + max_frame = self._obs_buffer.max(axis=0) + + return max_frame, total_reward, terminated, truncated, info + + def reset(self, **kwargs) -> Gym26ResetReturn: + return self.env.reset(**kwargs) + + +class ClipRewardEnv(gym.RewardWrapper): + """ + Clips the reward to {+1, 0, -1} by its sign. + + :param env: the environment + """ + + def __init__(self, env: gym.Env): + gym.RewardWrapper.__init__(self, env) + + def reward(self, reward: float) -> float: + """ + Bin reward to {+1, 0, -1} by its sign. + + :param reward: + :return: + """ + return np.sign(reward) + + +class WarpFrame(gym.ObservationWrapper): + """ + Convert to grayscale and warp frames to 84x84 (default) + as done in the Nature paper and later work. + + :param env: the environment + :param width: + :param height: + """ + + def __init__(self, env: gym.Env, width: int = 84, height: int = 84): + gym.ObservationWrapper.__init__(self, env) + self.width = width + self.height = height + self.observation_space = spaces.Box( + low=0, high=255, shape=(self.height, self.width, 1), dtype=env.observation_space.dtype + ) + + def observation(self, frame: np.ndarray) -> np.ndarray: + """ + returns the current observation from a frame + + :param frame: environment frame + :return: the observation + """ + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + frame = cv2.resize(frame, (self.width, self.height), interpolation=cv2.INTER_AREA) + return frame[:, :, None] + + +class AtariWrapper(gym.Wrapper): + """ + Atari 2600 preprocessings + + Specifically: + + * NoopReset: obtain initial state by taking random number of no-ops on reset. + * Frame skipping: 4 by default + * Max-pooling: most recent two observations + * Termination signal when a life is lost. + * Resize to a square image: 84x84 by default + * Grayscale observation + * Clip reward to {-1, 0, 1} + + :param env: gym environment + :param noop_max: max number of no-ops + :param frame_skip: the frequency at which the agent experiences the game. + :param screen_size: resize Atari frame + :param terminal_on_life_loss: if True, then step() returns done=True whenever a life is lost. + :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign. + """ + + def __init__( + self, + env: gym.Env, + noop_max: int = 30, + frame_skip: int = 4, + screen_size: int = 84, + terminal_on_life_loss: bool = True, + clip_reward: bool = True, + ): + if noop_max > 0: + env = NoopResetEnv(env, noop_max=noop_max) + if frame_skip > 0: + env = MaxAndSkipEnv(env, skip=frame_skip) + if terminal_on_life_loss: + env = EpisodicLifeEnv(env) + if "FIRE" in env.unwrapped.get_action_meanings(): + env = FireResetEnv(env) + env = WarpFrame(env, width=screen_size, height=screen_size) + if clip_reward: + env = ClipRewardEnv(env) + + super().__init__(env) diff --git a/cleanrl/c51.py b/cleanrl/c51.py index c8eb276d4..842ec5111 100755 --- a/cleanrl/c51.py +++ b/cleanrl/c51.py @@ -83,7 +83,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -165,12 +165,12 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_observation_space, envs.single_action_space, device, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -181,23 +181,25 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): actions = actions.cpu().numpy() # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] + rb.add(obs, real_next_obs, actions, rewards, terminateds, infos) # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/c51_atari.py b/cleanrl/c51_atari.py index 84931646c..691262625 100755 --- a/cleanrl/c51_atari.py +++ b/cleanrl/c51_atari.py @@ -99,7 +99,6 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -187,12 +186,12 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_action_space, device, optimize_memory_usage=True, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -203,23 +202,24 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): actions = actions.cpu().numpy() # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py index 00a821918..70dab1e06 100644 --- a/cleanrl/ddpg_continuous_action.py +++ b/cleanrl/ddpg_continuous_action.py @@ -7,7 +7,6 @@ import gym import numpy as np -import pybullet_envs # noqa import torch import torch.nn as nn import torch.nn.functional as F @@ -71,7 +70,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -164,12 +163,12 @@ def forward(self, x): envs.single_observation_space, envs.single_action_space, device, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here if global_step < args.learning_starts: @@ -181,7 +180,7 @@ def forward(self, x): actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes for info in infos: @@ -191,12 +190,12 @@ def forward(self, x): writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) break - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): + for idx, d in enumerate(terminateds): if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + real_next_obs[idx] = infos[idx]["final_observation"] + rb.add(obs, real_next_obs, actions, rewards, terminateds, _, infos) # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs @@ -208,7 +207,7 @@ def forward(self, x): next_state_actions = target_actor(data.next_observations) qf1_next_target = qf1_target(data.next_observations, next_state_actions) next_q_value = data.rewards.flatten() + (1 - data.dones.flatten()) * args.gamma * (qf1_next_target).view(-1) - + # TODO: to be updated to data.terminateds once SB3 is updated qf1_a_values = qf1(data.observations, data.actions).view(-1) qf1_loss = F.mse_loss(qf1_a_values, next_q_value) diff --git a/cleanrl/ddpg_continuous_action_jax.py b/cleanrl/ddpg_continuous_action_jax.py index b6291e4dc..92396133d 100644 --- a/cleanrl/ddpg_continuous_action_jax.py +++ b/cleanrl/ddpg_continuous_action_jax.py @@ -13,7 +13,6 @@ import jax.numpy as jnp import numpy as np import optax -import pybullet_envs # noqa from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter @@ -70,7 +69,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -150,11 +149,11 @@ class TrainState(TrainState): envs.single_observation_space, envs.single_action_space, device="cpu", - handle_timeout_termination=True, + handle_timeout_termination=False, ) # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) action_scale = np.array((envs.action_space.high - envs.action_space.low) / 2.0) action_bias = np.array((envs.action_space.high + envs.action_space.low) / 2.0) actor = Actor( @@ -186,11 +185,11 @@ def update_critic( actions: np.ndarray, next_observations: np.ndarray, rewards: np.ndarray, - dones: np.ndarray, + terminateds: np.ndarray, ): next_state_actions = (actor.apply(actor_state.target_params, next_observations)).clip(-1, 1) # TODO: proper clip qf1_next_target = qf1.apply(qf1_state.target_params, next_observations, next_state_actions).reshape(-1) - next_q_value = (rewards + (1 - dones) * args.gamma * (qf1_next_target)).reshape(-1) + next_q_value = (rewards + (1 - terminateds) * args.gamma * (qf1_next_target)).reshape(-1) def mse_loss(params): qf1_a_values = qf1.apply(params, observations, actions).squeeze() @@ -235,7 +234,7 @@ def actor_loss(params): ) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes for info in infos: @@ -245,12 +244,13 @@ def actor_loss(params): writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) break - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs @@ -265,7 +265,7 @@ def actor_loss(params): data.actions.numpy(), data.next_observations.numpy(), data.rewards.flatten().numpy(), - data.dones.flatten().numpy(), + data.dones.flatten().numpy(), # TODO: to be updated to data.terminateds once SB3 is updated ) if global_step % args.policy_frequency == 0: actor_state, qf1_state, actor_loss_value = update_actor( diff --git a/cleanrl/dqn.py b/cleanrl/dqn.py index fcfafa6ea..14864e77a 100644 --- a/cleanrl/dqn.py +++ b/cleanrl/dqn.py @@ -76,11 +76,9 @@ def parse_args(): def make_env(env_id, seed, idx, capture_video, run_name): def thunk(): env = gym.make(env_id) - env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -140,6 +138,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # env setup envs = gym.vector.SyncVectorEnv([make_env(args.env_id, args.seed, 0, args.capture_video, run_name)]) + envs = gym.wrappers.RecordEpisodeStatistics(envs) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" q_network = QNetwork(envs).to(device) @@ -152,12 +151,12 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_observation_space, envs.single_action_space, device, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -168,23 +167,24 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): actions = torch.argmax(q_values, dim=1).cpu().numpy() # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/dqn_atari.py b/cleanrl/dqn_atari.py index e0e5a2b4d..5c3439841 100644 --- a/cleanrl/dqn_atari.py +++ b/cleanrl/dqn_atari.py @@ -96,7 +96,7 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -174,12 +174,12 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_action_space, device, optimize_memory_usage=True, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -190,23 +190,24 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): actions = torch.argmax(q_values, dim=1).cpu().numpy() # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/dqn_atari_jax.py b/cleanrl/dqn_atari_jax.py index 12a4e16ae..75a63b6d3 100644 --- a/cleanrl/dqn_atari_jax.py +++ b/cleanrl/dqn_atari_jax.py @@ -98,7 +98,7 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -167,7 +167,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs = gym.vector.SyncVectorEnv([make_env(args.env_id, args.seed, 0, args.capture_video, run_name)]) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) q_network = QNetwork(action_dim=envs.single_action_space.n) @@ -188,14 +188,14 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_action_space, "cpu", optimize_memory_usage=True, - handle_timeout_termination=True, + handle_timeout_termination=False, ) @jax.jit - def update(q_state, observations, actions, next_observations, rewards, dones): + def update(q_state, observations, actions, next_observations, rewards, terminateds): q_next_target = q_network.apply(q_state.target_params, next_observations) # (batch_size, num_actions) q_next_target = jnp.max(q_next_target, axis=-1) # (batch_size,) - next_q_value = rewards + (1 - dones) * args.gamma * q_next_target + next_q_value = rewards + (1 - terminateds) * args.gamma * q_next_target def mse_loss(params): q_pred = q_network.apply(params, observations) # (batch_size, num_actions) @@ -209,7 +209,7 @@ def mse_loss(params): start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -221,23 +221,24 @@ def mse_loss(params): actions = jax.device_get(actions) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/dqn_jax.py b/cleanrl/dqn_jax.py index 82c05499e..866882ea2 100644 --- a/cleanrl/dqn_jax.py +++ b/cleanrl/dqn_jax.py @@ -74,11 +74,9 @@ def parse_args(): def make_env(env_id, seed, idx, capture_video, run_name): def thunk(): env = gym.make(env_id) - env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -138,9 +136,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # env setup envs = gym.vector.SyncVectorEnv([make_env(args.env_id, args.seed, 0, args.capture_video, run_name)]) + envs = gym.wrappers.RecordEpisodeStatistics(envs) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) q_network = QNetwork(action_dim=envs.single_action_space.n) @@ -160,14 +159,14 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): envs.single_observation_space, envs.single_action_space, "cpu", - handle_timeout_termination=True, + handle_timeout_termination=False, ) @jax.jit - def update(q_state, observations, actions, next_observations, rewards, dones): + def update(q_state, observations, actions, next_observations, rewards, terminateds): q_next_target = q_network.apply(q_state.target_params, next_observations) # (batch_size, num_actions) q_next_target = jnp.max(q_next_target, axis=-1) # (batch_size,) - next_q_value = rewards + (1 - dones) * args.gamma * q_next_target + next_q_value = rewards + (1 - terminateds) * args.gamma * q_next_target def mse_loss(params): q_pred = q_network.apply(params, observations) # (batch_size, num_actions) @@ -181,7 +180,7 @@ def mse_loss(params): start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step) @@ -193,23 +192,25 @@ def mse_loss(params): actions = jax.device_get(actions) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes - for info in infos: - if "episode" in info.keys(): - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break - - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + if "episode" in infos: + first_idx = infos["_episode"].nonzero()[0][0] + r = infos["episode"]["r"][first_idx] + l = infos["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) + + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] + rb.add(obs, real_next_obs, actions, rewards, terminateds, infos) # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/ppg_procgen.py b/cleanrl/ppg_procgen.py index ea574a814..71589b3ef 100644 --- a/cleanrl/ppg_procgen.py +++ b/cleanrl/ppg_procgen.py @@ -261,7 +261,7 @@ def get_pi(self, x): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) aux_obs = torch.zeros( (args.num_steps, args.aux_batch_rollouts) + envs.single_observation_space.shape, dtype=torch.uint8 @@ -271,8 +271,8 @@ def get_pi(self, x): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size num_phases = int(num_updates // args.n_iteration) @@ -289,7 +289,7 @@ def get_pi(self, x): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -299,9 +299,9 @@ def get_pi(self, x): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) for item in info: if "episode" in item.keys(): @@ -310,7 +310,7 @@ def get_pi(self, x): writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) break - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) if args.gae: @@ -318,10 +318,10 @@ def get_pi(self, x): lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam @@ -330,10 +330,10 @@ def get_pi(self, x): returns = torch.zeros_like(rewards).to(device) for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated next_return = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] next_return = returns[t + 1] returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return advantages = returns - values diff --git a/cleanrl/ppo.py b/cleanrl/ppo.py index 4a7c2999d..90789ca47 100644 --- a/cleanrl/ppo.py +++ b/cleanrl/ppo.py @@ -80,11 +80,9 @@ def parse_args(): def make_env(env_id, seed, idx, capture_video, run_name): def thunk(): env = gym.make(env_id) - env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -160,6 +158,7 @@ def get_action_and_value(self, x, action=None): envs = gym.vector.SyncVectorEnv( [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)] ) + envs = gym.wrappers.RecordEpisodeStatistics(envs) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" agent = Agent(envs).to(device) @@ -170,14 +169,14 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size for update in range(1, num_updates + 1): @@ -190,7 +189,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -200,28 +199,29 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) - for item in info: - if "episode" in item.keys(): - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "episode" in info: + first_idx = info["_episode"].nonzero()[0][0] + r = info["episode"]["r"][first_idx] + l = info["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_atari.py b/cleanrl/ppo_atari.py index 14be7a470..fa72bcadd 100644 --- a/cleanrl/ppo_atari.py +++ b/cleanrl/ppo_atari.py @@ -13,7 +13,7 @@ from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -from stable_baselines3.common.atari_wrappers import ( # isort:skip +from atari_wrappers import ( # isort:skip ClipRewardEnv, EpisodicLifeEnv, FireResetEnv, @@ -40,7 +40,7 @@ def parse_args(): parser.add_argument("--wandb-entity", type=str, default=None, help="the entity (team) of wandb's project") parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") + help="weather to capture videos of the agent performances (check out `videos` folder)") # Algorithm specific arguments parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", @@ -100,7 +100,7 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -186,14 +186,14 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size for update in range(1, num_updates + 1): @@ -206,7 +206,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -216,28 +216,29 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) - for item in info: - if "episode" in item.keys(): - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "episode" in info: + first_idx = info["_episode"].nonzero()[0][0] + r = info["episode"]["r"][first_idx] + l = info["episode"]["l"][first_idx] + print(f"global_step={global_step}, episodic_return={r}") + writer.add_scalar("charts/episodic_return", r, global_step) + writer.add_scalar("charts/episodic_length", l, global_step) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_atari_envpool.py b/cleanrl/ppo_atari_envpool.py index f72f223ed..70ccaee71 100644 --- a/cleanrl/ppo_atari_envpool.py +++ b/cleanrl/ppo_atari_envpool.py @@ -86,16 +86,16 @@ def __init__(self, env, deque_size=100): self.episode_lengths = None def reset(self, **kwargs): - observations = super().reset(**kwargs) + observations, _ = super().reset(**kwargs) self.episode_returns = np.zeros(self.num_envs, dtype=np.float32) self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32) self.lives = np.zeros(self.num_envs, dtype=np.int32) self.returned_episode_returns = np.zeros(self.num_envs, dtype=np.float32) self.returned_episode_lengths = np.zeros(self.num_envs, dtype=np.int32) - return observations + return observations, {} def step(self, action): - observations, rewards, dones, infos = super().step(action) + observations, rewards, terminateds, truncateds, infos = super().step(action) self.episode_returns += infos["reward"] self.episode_lengths += 1 self.returned_episode_returns[:] = self.episode_returns @@ -107,7 +107,8 @@ def step(self, action): return ( observations, rewards, - dones, + terminateds, + truncateds, infos, ) @@ -199,15 +200,15 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) avg_returns = deque(maxlen=20) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs, _ = torch.Tensor(envs.reset(seed=args.seed)).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size for update in range(1, num_updates + 1): @@ -220,7 +221,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -230,11 +231,11 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) - for idx, d in enumerate(done): + for idx, d in enumerate(terminated): if d and info["lives"][idx] == 0: print(f"global_step={global_step}, episodic_return={info['r'][idx]}") avg_returns.append(info["r"][idx]) @@ -242,17 +243,17 @@ def get_action_and_value(self, x, action=None): writer.add_scalar("charts/episodic_return", info["r"][idx], global_step) writer.add_scalar("charts/episodic_length", info["l"][idx], global_step) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_atari_lstm.py b/cleanrl/ppo_atari_lstm.py index a90aa4ce8..e1a8d3e67 100644 --- a/cleanrl/ppo_atari_lstm.py +++ b/cleanrl/ppo_atari_lstm.py @@ -100,7 +100,7 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 1) - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -137,15 +137,15 @@ def __init__(self, envs): self.actor = layer_init(nn.Linear(128, envs.single_action_space.n), std=0.01) self.critic = layer_init(nn.Linear(128, 1), std=1) - def get_states(self, x, lstm_state, done): + def get_states(self, x, lstm_state, terminated): hidden = self.network(x / 255.0) # LSTM logic batch_size = lstm_state[0].shape[1] hidden = hidden.reshape((-1, batch_size, self.lstm.input_size)) - done = done.reshape((-1, batch_size)) + terminated = terminated.reshape((-1, batch_size)) new_hidden = [] - for h, d in zip(hidden, done): + for h, d in zip(hidden, terminated): h, lstm_state = self.lstm( h.unsqueeze(0), ( @@ -157,12 +157,12 @@ def get_states(self, x, lstm_state, done): new_hidden = torch.flatten(torch.cat(new_hidden), 0, 1) return new_hidden, lstm_state - def get_value(self, x, lstm_state, done): - hidden, _ = self.get_states(x, lstm_state, done) + def get_value(self, x, lstm_state, terminated): + hidden, _ = self.get_states(x, lstm_state, terminated) return self.critic(hidden) - def get_action_and_value(self, x, lstm_state, done, action=None): - hidden, lstm_state = self.get_states(x, lstm_state, done) + def get_action_and_value(self, x, lstm_state, terminated, action=None): + hidden, lstm_state = self.get_states(x, lstm_state, terminated) logits = self.actor(hidden) probs = Categorical(logits=logits) if action is None: @@ -213,14 +213,14 @@ def get_action_and_value(self, x, lstm_state, done, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) next_lstm_state = ( torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device), torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device), @@ -238,19 +238,21 @@ def get_action_and_value(self, x, lstm_state, done, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): - action, logprob, _, value, next_lstm_state = agent.get_action_and_value(next_obs, next_lstm_state, next_done) + action, logprob, _, value, next_lstm_state = agent.get_action_and_value( + next_obs, next_lstm_state, next_terminated + ) values[step] = value.flatten() actions[step] = action logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) for item in info: if "episode" in item.keys(): @@ -259,21 +261,21 @@ def get_action_and_value(self, x, lstm_state, done, action=None): writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) break - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value( next_obs, next_lstm_state, - next_done, + next_terminated, ).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam @@ -283,7 +285,7 @@ def get_action_and_value(self, x, lstm_state, done, action=None): b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) b_logprobs = logprobs.reshape(-1) b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_dones = dones.reshape(-1) + b_terminateds = terminateds.reshape(-1) b_advantages = advantages.reshape(-1) b_returns = returns.reshape(-1) b_values = values.reshape(-1) @@ -304,7 +306,7 @@ def get_action_and_value(self, x, lstm_state, done, action=None): _, newlogprob, entropy, newvalue, _ = agent.get_action_and_value( b_obs[mb_inds], (initial_lstm_state[0][:, mbenvinds], initial_lstm_state[1][:, mbenvinds]), - b_dones[mb_inds], + b_terminateds[mb_inds], b_actions.long()[mb_inds], ) logratio = newlogprob - b_logprobs[mb_inds] diff --git a/cleanrl/ppo_atari_multigpu.py b/cleanrl/ppo_atari_multigpu.py index 8955e1298..19597ca3d 100644 --- a/cleanrl/ppo_atari_multigpu.py +++ b/cleanrl/ppo_atari_multigpu.py @@ -106,7 +106,7 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -225,14 +225,14 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // (args.batch_size * world_size) for update in range(1, num_updates + 1): @@ -245,7 +245,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs * world_size obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -255,9 +255,9 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) for item in info: if "episode" in item.keys() and local_rank == 0: @@ -269,17 +269,17 @@ def get_action_and_value(self, x, action=None): print( f"local_rank: {local_rank}, action.sum(): {action.sum()}, update: {update}, agent.actor.weight.sum(): {agent.actor.weight.sum()}" ) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_continuous_action_isaacgym/isaacgym/poetry.lock b/cleanrl/ppo_continuous_action_isaacgym/isaacgym/poetry.lock index 25cd8c7c3..86b66d377 100644 --- a/cleanrl/ppo_continuous_action_isaacgym/isaacgym/poetry.lock +++ b/cleanrl/ppo_continuous_action_isaacgym/isaacgym/poetry.lock @@ -27,7 +27,7 @@ python-versions = ">=3.6" [[package]] name = "gym" -version = "0.23.1" +version = "0.26.1" description = "Gym: A universal API for reinforcement learning environments" category = "main" optional = false @@ -261,7 +261,7 @@ cloudpickle = [ {file = "cloudpickle-2.2.0.tar.gz", hash = "sha256:3f4219469c55453cfe4737e564b67c2a149109dabf7f242478948b895f61106f"}, ] gym = [ - {file = "gym-0.23.1.tar.gz", hash = "sha256:d0f9b9da34edbdace421c9442fc9205d03b8d15d0fb451053c766cde706d40e0"}, + {file = "gym-0.26.1.tar.gz", hash = "sha256:d0f9b9da34edbdace421c9442fc9205d03b8d15d0fb451053c766cde706d40e0"}, ] gym-notices = [ {file = "gym-notices-0.0.8.tar.gz", hash = "sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911"}, diff --git a/cleanrl/ppo_continuous_action_isaacgym/isaacgym/pyproject.toml b/cleanrl/ppo_continuous_action_isaacgym/isaacgym/pyproject.toml index efdfdd560..8a674d684 100644 --- a/cleanrl/ppo_continuous_action_isaacgym/isaacgym/pyproject.toml +++ b/cleanrl/ppo_continuous_action_isaacgym/isaacgym/pyproject.toml @@ -10,7 +10,7 @@ packages = [ [tool.poetry.dependencies] python = ">=3.7.1,<3.10" -gym = "0.23.1" +gym = "0.26.1" torch = "^1.12.0" torchvision = "^0.13.0" PyYAML = ">=5.3.1" diff --git a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py index ddf3cf898..ee1775848 100644 --- a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py +++ b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py @@ -260,7 +260,7 @@ def observation(self, obs): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = envs.reset() + next_obs, _ = envs.reset(seed=args.seed) next_done = torch.zeros(args.num_envs, dtype=torch.float).to(device) num_updates = args.total_timesteps // args.batch_size @@ -298,7 +298,7 @@ def observation(self, obs): ) break - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) diff --git a/cleanrl/ppo_pettingzoo_ma_atari.py b/cleanrl/ppo_pettingzoo_ma_atari.py index bc51c703c..96c43e3a5 100644 --- a/cleanrl/ppo_pettingzoo_ma_atari.py +++ b/cleanrl/ppo_pettingzoo_ma_atari.py @@ -173,14 +173,14 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size for update in range(1, num_updates + 1): @@ -193,7 +193,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -203,9 +203,9 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) for idx, item in enumerate(info): player_idx = idx % 2 @@ -214,17 +214,17 @@ def get_action_and_value(self, x, action=None): writer.add_scalar(f"charts/episodic_return-player{player_idx}", item["episode"]["r"], global_step) writer.add_scalar(f"charts/episodic_length-player{player_idx}", item["episode"]["l"], global_step) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_procgen.py b/cleanrl/ppo_procgen.py index 9a93eb0cd..506993e31 100644 --- a/cleanrl/ppo_procgen.py +++ b/cleanrl/ppo_procgen.py @@ -203,14 +203,14 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size for update in range(1, num_updates + 1): @@ -223,7 +223,7 @@ def get_action_and_value(self, x, action=None): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -233,9 +233,9 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, _, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) for item in info: if "episode" in item.keys(): @@ -244,17 +244,17 @@ def get_action_and_value(self, x, action=None): writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) break - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done + nextnonterminal = 1.0 - next_terminated nextvalues = next_value else: - nextnonterminal = 1.0 - dones[t + 1] + nextnonterminal = 1.0 - terminateds[t + 1] nextvalues = values[t + 1] delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam diff --git a/cleanrl/ppo_rnd_envpool.py b/cleanrl/ppo_rnd_envpool.py index 32676d08b..0f5ec6368 100644 --- a/cleanrl/ppo_rnd_envpool.py +++ b/cleanrl/ppo_rnd_envpool.py @@ -110,7 +110,7 @@ def reset(self, **kwargs): return observations def step(self, action): - observations, rewards, dones, infos = super().step(action) + observations, rewards, terminateds, truncateds, infos = super().step(action) self.episode_returns += infos["reward"] self.episode_lengths += 1 self.returned_episode_returns[:] = self.episode_returns @@ -122,7 +122,8 @@ def step(self, action): return ( observations, rewards, - dones, + terminateds, + truncateds, infos, ) @@ -305,7 +306,7 @@ def update(self, rews): logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) curiosity_rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminateds = torch.zeros((args.num_steps, args.num_envs)).to(device) ext_values = torch.zeros((args.num_steps, args.num_envs)).to(device) int_values = torch.zeros((args.num_steps, args.num_envs)).to(device) avg_returns = deque(maxlen=20) @@ -313,8 +314,8 @@ def update(self, rews): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs = torch.Tensor(envs.reset()[0]).to(device) + next_terminated = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size print("Start to initialize observation normalization parameter.....") @@ -340,7 +341,7 @@ def update(self, rews): for step in range(0, args.num_steps): global_step += 1 * args.num_envs obs[step] = next_obs - dones[step] = next_done + terminateds[step] = next_terminated # ALGO LOGIC: action logic with torch.no_grad(): @@ -355,9 +356,9 @@ def update(self, rews): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) rnd_next_obs = ( ( (next_obs[:, 3, :, :].reshape(args.num_envs, 1, 84, 84) - torch.from_numpy(obs_rms.mean).to(device)) @@ -367,7 +368,7 @@ def update(self, rews): target_next_feature = rnd_model.target(rnd_next_obs) predict_next_feature = rnd_model.predictor(rnd_next_obs) curiosity_rewards[step] = ((target_next_feature - predict_next_feature).pow(2).sum(1) / 2).data - for idx, d in enumerate(done): + for idx, d in enumerate(terminated): if d and info["lives"][idx] == 0: avg_returns.append(info["r"][idx]) epi_ret = np.average(avg_returns) @@ -395,7 +396,7 @@ def update(self, rews): curiosity_rewards /= np.sqrt(reward_rms.var) - # bootstrap value if not done + # bootstrap value if not terminated with torch.no_grad(): next_value_ext, next_value_int = agent.get_value(next_obs) next_value_ext, next_value_int = next_value_ext.reshape(1, -1), next_value_int.reshape(1, -1) @@ -405,12 +406,12 @@ def update(self, rews): int_lastgaelam = 0 for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: - ext_nextnonterminal = 1.0 - next_done + ext_nextnonterminal = 1.0 - next_terminated int_nextnonterminal = 1.0 ext_nextvalues = next_value_ext int_nextvalues = next_value_int else: - ext_nextnonterminal = 1.0 - dones[t + 1] + ext_nextnonterminal = 1.0 - terminateds[t + 1] int_nextnonterminal = 1.0 ext_nextvalues = ext_values[t + 1] int_nextvalues = int_values[t + 1] diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py index 52fc0c9eb..6f7eb6a32 100644 --- a/cleanrl/sac_continuous_action.py +++ b/cleanrl/sac_continuous_action.py @@ -7,7 +7,6 @@ import gym import numpy as np -import pybullet_envs # noqa import torch import torch.nn as nn import torch.nn.functional as F @@ -79,7 +78,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -207,12 +206,12 @@ def get_action(self, x): envs.single_observation_space, envs.single_action_space, device, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here if global_step < args.learning_starts: @@ -222,7 +221,7 @@ def get_action(self, x): actions = actions.detach().cpu().numpy() # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes for info in infos: @@ -232,12 +231,13 @@ def get_action(self, x): writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) break - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs @@ -251,6 +251,7 @@ def get_action(self, x): qf2_next_target = qf2_target(data.next_observations, next_state_actions) min_qf_next_target = torch.min(qf1_next_target, qf2_next_target) - alpha * next_state_log_pi next_q_value = data.rewards.flatten() + (1 - data.dones.flatten()) * args.gamma * (min_qf_next_target).view(-1) + # TODO: to be updated to data.terminateds once SB3 is updated qf1_a_values = qf1(data.observations, data.actions).view(-1) qf2_a_values = qf2(data.observations, data.actions).view(-1) diff --git a/cleanrl/td3_continuous_action.py b/cleanrl/td3_continuous_action.py index 7c08eca81..52f06da94 100644 --- a/cleanrl/td3_continuous_action.py +++ b/cleanrl/td3_continuous_action.py @@ -7,7 +7,6 @@ import gym import numpy as np -import pybullet_envs # noqa import torch import torch.nn as nn import torch.nn.functional as F @@ -73,7 +72,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -169,12 +168,12 @@ def forward(self, x): envs.single_observation_space, envs.single_action_space, device, - handle_timeout_termination=True, + handle_timeout_termination=False, ) start_time = time.time() # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) for global_step in range(args.total_timesteps): # ALGO LOGIC: put action logic here if global_step < args.learning_starts: @@ -186,7 +185,7 @@ def forward(self, x): actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes for info in infos: @@ -196,12 +195,13 @@ def forward(self, x): writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) break - # TRY NOT TO MODIFY: save data to reply buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/cleanrl/td3_continuous_action_jax.py b/cleanrl/td3_continuous_action_jax.py index 457da7cf1..e39c42aa2 100644 --- a/cleanrl/td3_continuous_action_jax.py +++ b/cleanrl/td3_continuous_action_jax.py @@ -13,7 +13,6 @@ import jax.numpy as jnp import numpy as np import optax -import pybullet_envs # noqa from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter @@ -72,7 +71,7 @@ def thunk(): if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") - env.seed(seed) + env.action_space.seed(seed) env.observation_space.seed(seed) return env @@ -152,11 +151,11 @@ class TrainState(TrainState): envs.single_observation_space, envs.single_action_space, device="cpu", - handle_timeout_termination=True, + handle_timeout_termination=False, ) # TRY NOT TO MODIFY: start the game - obs = envs.reset() + obs, _ = envs.reset(seed=args.seed) actor = Actor( action_dim=np.prod(envs.single_action_space.shape), action_scale=jnp.array((envs.action_space.high - envs.action_space.low) / 2.0), @@ -193,7 +192,7 @@ def update_critic( actions: np.ndarray, next_observations: np.ndarray, rewards: np.ndarray, - dones: np.ndarray, + terminateds: np.ndarray, key: jnp.ndarray, ): # TODO Maybe pre-generate a lot of random keys @@ -215,7 +214,7 @@ def update_critic( qf1_next_target = qf.apply(qf1_state.target_params, next_observations, next_state_actions).reshape(-1) qf2_next_target = qf.apply(qf2_state.target_params, next_observations, next_state_actions).reshape(-1) min_qf_next_target = jnp.minimum(qf1_next_target, qf2_next_target) - next_q_value = (rewards + (1 - dones) * args.gamma * (min_qf_next_target)).reshape(-1) + next_q_value = (rewards + (1 - terminateds) * args.gamma * (min_qf_next_target)).reshape(-1) def mse_loss(params): qf_a_values = qf.apply(params, observations, actions).squeeze() @@ -269,7 +268,7 @@ def actor_loss(params): ) # TRY NOT TO MODIFY: execute the game and log data. - next_obs, rewards, dones, infos = envs.step(actions) + next_obs, rewards, terminateds, _, infos = envs.step(actions) # TRY NOT TO MODIFY: record rewards for plotting purposes for info in infos: @@ -279,12 +278,13 @@ def actor_loss(params): writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) break - # TRY NOT TO MODIFY: save data to replay buffer; handle `terminal_observation` - real_next_obs = next_obs.copy() - for idx, d in enumerate(dones): - if d: - real_next_obs[idx] = infos[idx]["terminal_observation"] - rb.add(obs, real_next_obs, actions, rewards, dones, infos) + # TRY NOT TO MODIFY: save data to replay buffer; handle `final_observation` + real_next_obs = next_obs + if "final_observation" in infos: + real_next_obs = next_obs.copy() + for idx, d in enumerate(infos["_final_observation"]): + if d: + real_next_obs[idx] = infos["final_observation"][idx] # TRY NOT TO MODIFY: CRUCIAL step easy to overlook obs = next_obs diff --git a/docs/contribution.md b/docs/contribution.md index 1beba7c11..75344b82f 100644 --- a/docs/contribution.md +++ b/docs/contribution.md @@ -11,7 +11,7 @@ Good luck and have fun! ```bash poetry install poetry install --with atari -poetry install --with pybullet +poetry install --with mujoco ``` Then you can run the scripts under the poetry environment in two ways: `poetry run` or `poetry shell`. diff --git a/docs/get-started/examples.md b/docs/get-started/examples.md index b9c4b676c..d63e3917a 100644 --- a/docs/get-started/examples.md +++ b/docs/get-started/examples.md @@ -32,14 +32,14 @@ python cleanrl/ppo.py --env-id CartPole-v1 python cleanrl/c51.py --env-id CartPole-v1 ``` -## PyBullet +## MuJoCo ``` poetry shell -poetry install --with pybullet -python cleanrl/td3_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 -python cleanrl/ddpg_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 -python cleanrl/sac_continuous_action.py --env-id MinitaurBulletDuckEnv-v0 +poetry install --with mujoco +python cleanrl/td3_continuous_action.py --env-id HalfCheetah-v4 +python cleanrl/ddpg_continuous_action.py --env-id HalfCheetah-v4 +python cleanrl/sac_continuous_action.py --env-id HalfCheetah-v4 ``` ## Procgen diff --git a/docs/get-started/installation.md b/docs/get-started/installation.md index ea96d3b75..00000b371 100644 --- a/docs/get-started/installation.md +++ b/docs/get-started/installation.md @@ -60,11 +60,6 @@ ale-py = "0.7.4" AutoROM = {extras = ["accept-rom-license"], version = "^0.4.2"} opencv-python = "^4.6.0.66" -[tool.poetry.group.pybullet] -optional = true -[tool.poetry.group.pybullet.dependencies] -pybullet = "3.1.8" - [tool.poetry.group.procgen] optional = true [tool.poetry.group.procgen.dependencies] diff --git a/docs/rl-algorithms/ppo-isaacgymenvs.md b/docs/rl-algorithms/ppo-isaacgymenvs.md index 7508b591e..830b00c0e 100644 --- a/docs/rl-algorithms/ppo-isaacgymenvs.md +++ b/docs/rl-algorithms/ppo-isaacgymenvs.md @@ -170,9 +170,9 @@ Additionally, `charts/consecutive_successes` means the number of consecutive epi 1. Create a custom `RecordEpisodeStatisticsTorch` wrapper that records statstics using GPU tensors instead of `numpy` arrays. 1. Avoid transferring the tensors to CPU. The related code in `ppo_continuous_action.py` looks like ```python - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminated, truncated, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_terminated = torch.Tensor(next_obs).to(device), torch.Tensor(terminated).to(device) ``` and the related code in `ppo_continuous_action_isaacgym.py` looks like ```python diff --git a/docs/rl-algorithms/sac.md b/docs/rl-algorithms/sac.md index ed4a54723..5effcacde 100644 --- a/docs/rl-algorithms/sac.md +++ b/docs/rl-algorithms/sac.md @@ -49,14 +49,14 @@ The [sac_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl ```bash poetry install -# Pybullet -poetry install --with pybullet +# MuJoCo +poetry install --with mujoco ## Default -python cleanrl/sac_continuous_action.py --env-id HopperBulletEnv-v0 +python cleanrl/sac_continuous_action.py --env-id HalfCheetah-v4 ## Without Automatic entropy coef. tuning -python cleanrl/sac_continuous_action.py --env-id HopperBulletEnv-v0 --autotune False --alpha 0.2 +python cleanrl/sac_continuous_action.py --env-id HalfCheetah-v4 --autotune False --alpha 0.2 ``` ### Explanation of the logged metrics diff --git a/poetry.lock b/poetry.lock index ea2491685..55f7bfbeb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -39,25 +39,6 @@ SQLAlchemy = ">=1.3.0" [package.extras] tz = ["python-dateutil"] -[[package]] -name = "antlr4-python3-runtime" -version = "4.9.3" -description = "ANTLR 4.9.3 runtime for Python 3.7" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "async-timeout" -version = "4.0.2" -description = "Timeout context manager for asyncio programs" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -typing-extensions = {version = ">=3.6.5", markers = "python_version < \"3.8\""} - [[package]] name = "attrs" version = "22.1.0" @@ -690,16 +671,16 @@ protobuf = ["grpcio-tools (>=1.48.1)"] [[package]] name = "gym" -version = "0.23.1" +version = "0.26.1" description = "Gym: A universal API for reinforcement learning environments" category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.6" [package.dependencies] cloudpickle = ">=1.2.0" gym_notices = ">=0.0.4" -importlib_metadata = {version = ">=4.10.0", markers = "python_version < \"3.10\""} +importlib_metadata = {version = ">=4.8.0", markers = "python_version < \"3.10\""} numpy = ">=1.18.0" [package.extras] @@ -912,54 +893,6 @@ category = "main" optional = false python-versions = "*" -[[package]] -name = "isaacgym" -version = "1.0.preview4" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7.1,<3.10" -develop = true - -[package.dependencies] -gym = "0.23.1" -imageio = "^2.19.5" -ninja = "^1.10.2" -numpy = ">=1.16.4" -Pillow = "^9.2.0" -PyYAML = ">=5.3.1" -scipy = ">=1.5.0" -torch = "^1.12.0" -torchvision = "^0.13.0" - -[package.source] -type = "directory" -url = "cleanrl/ppo_continuous_action_isaacgym/isaacgym" - -[[package]] -name = "isaacgymenvs" -version = "0.1.0" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7.1,<3.10" -develop = false - -[package.dependencies] -gym = "0.23.1" -hydra-core = "^1.2.0" -numpy = ">=1.16.4" -omegaconf = "^2.2.2" -PyVirtualDisplay = "^3.0" -rl-games = "1.5.2" -termcolor = "^1.1.0" - -[package.source] -type = "git" -url = "https://github.com/vwxyzjn/IsaacGymEnvs.git" -reference = "poetry" -resolved_reference = "27cc130a811b2305056c2f03f5f4cc0819b7867c" - [[package]] name = "jax" version = "0.3.17" @@ -1029,26 +962,6 @@ category = "main" optional = false python-versions = ">=3.6" -[[package]] -name = "jsonschema" -version = "4.16.0" -description = "An implementation of JSON Schema validation for Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -attrs = ">=17.4.0" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} -importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} -pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} -pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] - [[package]] name = "kiwisolver" version = "1.4.4" @@ -1283,21 +1196,24 @@ version = "0.1.11" description = "Multi-Agent Arcade Learning Environment Python Interface" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.7" [package.dependencies] +absl-py = "*" +glfw = "*" numpy = "*" +pyopengl = "*" [[package]] -name = "ninja" -version = "1.10.2.3" -description = "Ninja is a small build system with a focus on speed" +name = "multi-agent-ale-py" +version = "0.1.11" +description = "Multi-Agent Arcade Learning Environment Python Interface" category = "dev" optional = false python-versions = "*" -[package.extras] -test = ["codecov (>=2.0.5)", "coverage (>=4.2)", "flake8 (>=3.0.4)", "pytest (>=4.5.0)", "pytest-cov (>=2.7.1)", "pytest-runner (>=5.1)", "pytest-virtualenv (>=1.7.0)", "virtualenv (>=15.0.3)"] +[package.dependencies] +numpy = "*" [[package]] name = "nodeenv" @@ -1331,18 +1247,6 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] -[[package]] -name = "omegaconf" -version = "2.2.3" -description = "A flexible configuration library" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -antlr4-python3-runtime = ">=4.9.0,<4.10.0" -PyYAML = ">=5.1.0" - [[package]] name = "opencv-python" version = "4.6.0.66" @@ -1516,14 +1420,6 @@ python-versions = ">=3.7" docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -[[package]] -name = "pkgutil_resolve_name" -version = "1.3.10" -description = "Resolve a name to an object." -category = "dev" -optional = false -python-versions = ">=3.6" - [[package]] name = "platformdirs" version = "2.5.2" @@ -1687,14 +1583,6 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -[[package]] -name = "pygame" -version = "2.1.0" -description = "Python Game Development" -category = "main" -optional = false -python-versions = ">=3.6" - [[package]] name = "Pygments" version = "2.13.0" @@ -1749,14 +1637,6 @@ category = "main" optional = false python-versions = "*" -[[package]] -name = "pyrsistent" -version = "0.18.1" -description = "Persistent/Functional/Immutable data structures" -category = "dev" -optional = false -python-versions = ">=3.7" - [[package]] name = "pytest" version = "7.1.3" @@ -1797,14 +1677,6 @@ category = "main" optional = false python-versions = "*" -[[package]] -name = "PyVirtualDisplay" -version = "3.0" -description = "python wrapper for Xvfb, Xephyr and Xvnc" -category = "dev" -optional = false -python-versions = "*" - [[package]] name = "PyYAML" version = "5.4.1" @@ -1824,59 +1696,6 @@ python-versions = ">=3.6" [package.dependencies] pyyaml = "*" -[[package]] -name = "ray" -version = "1.10.0" -description = "Ray provides a simple, universal API for building distributed applications." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -attrs = "*" -click = ">=7.0" -filelock = "*" -grpcio = ">=1.28.1" -jsonschema = "*" -msgpack = ">=1.0.0,<2.0.0" -numpy = [ - {version = ">=1.16", markers = "python_version < \"3.9\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.9\""}, -] -protobuf = ">=3.15.3" -pyyaml = "*" -redis = ">=3.5.0" - -[package.extras] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "aioredis (<2)", "aiorwlock", "aiosignal", "colorful", "dm-tree", "fastapi", "frozenlist", "fsspec", "gpustat (>=1.0.0b1)", "gym", "kubernetes", "lz4", "matplotlib (!=3.4.3)", "numpy (>=1.19)", "opencensus", "opentelemetry-api (==1.1.0)", "opentelemetry-exporter-otlp (==1.1.0)", "opentelemetry-sdk (==1.1.0)", "pandas", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=4.0.1)", "pyyaml", "ray-cpp (==1.10.0)", "requests", "scikit-image", "scipy", "smart-open", "starlette", "tabulate", "tensorboardX (>=1.9)", "urllib3", "uvicorn"] -cpp = ["ray-cpp (==1.10.0)"] -data = ["fsspec", "numpy (>=1.19)", "pandas", "pyarrow (>=4.0.1)"] -default = ["aiohttp (>=3.7)", "aiohttp-cors", "aioredis (<2)", "aiosignal", "colorful", "frozenlist", "gpustat (>=1.0.0b1)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "requests", "smart-open"] -k8s = ["kubernetes", "urllib3"] -observability = ["opentelemetry-api (==1.1.0)", "opentelemetry-exporter-otlp (==1.1.0)", "opentelemetry-sdk (==1.1.0)"] -rllib = ["dm-tree", "gym", "lz4", "matplotlib (!=3.4.3)", "pandas", "pyyaml", "requests", "scikit-image", "scipy", "tabulate", "tensorboardX (>=1.9)"] -serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aioredis (<2)", "aiorwlock", "aiosignal", "colorful", "fastapi", "frozenlist", "gpustat (>=1.0.0b1)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "requests", "smart-open", "starlette", "uvicorn"] -tune = ["pandas", "requests", "tabulate", "tensorboardX (>=1.9)"] - -[[package]] -name = "redis" -version = "4.3.4" -description = "Python client for Redis database and key-value store" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -async-timeout = ">=4.0.2" -deprecated = ">=1.2.3" -importlib-metadata = {version = ">=1.0", markers = "python_version < \"3.8\""} -packaging = ">=20.4" -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -hiredis = ["hiredis (>=1.0.0)"] -ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] - [[package]] name = "requests" version = "2.28.1" @@ -1927,25 +1746,6 @@ typing-extensions = {version = ">=3.7.4,<5.0", markers = "python_version < \"3.8 [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] -[[package]] -name = "rl-games" -version = "1.5.2" -description = "" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -gym = ">=0.17.2" -numpy = ">=1.16.0" -psutil = "*" -pyyaml = "*" -ray = ">=1.1.0" -setproctitle = "*" -tensorboard = ">=1.14.0" -tensorboardX = ">=1.6" -torch = ">=1.7.0" - [[package]] name = "rsa" version = "4.7.2" @@ -2237,26 +2037,6 @@ category = "main" optional = false python-versions = "*" -[[package]] -name = "tensorboardX" -version = "2.5.1" -description = "TensorBoardX lets you watch Tensors Flow without Tensorflow" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -numpy = "*" -protobuf = ">=3.8.0,<=3.20.1" - -[[package]] -name = "termcolor" -version = "1.1.0" -description = "ANSII Color formatting for output in terminal." -category = "dev" -optional = false -python-versions = "*" - [[package]] name = "threadpoolctl" version = "3.1.0" @@ -2311,24 +2091,6 @@ python-versions = ">=3.7.0" [package.dependencies] typing-extensions = "*" -[[package]] -name = "torchvision" -version = "0.13.1" -description = "image and video datasets and models for torch deep learning" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = "*" -pillow = ">=5.3.0,<8.3.0 || >=8.4.0" -requests = "*" -torch = "1.12.1" -typing-extensions = "*" - -[package.extras] -scipy = ["scipy"] - [[package]] name = "tqdm" version = "4.64.1" @@ -2475,14 +2237,6 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" [package.extras] test = ["pytest (>=3.0.0)", "pytest-cov"] -[[package]] -name = "wrapt" -version = "1.14.1" -description = "Module for decorators, wrappers and monkey patching." -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" - [[package]] name = "zipp" version = "3.8.1" @@ -2551,13 +2305,6 @@ alembic = [ {file = "alembic-1.8.1-py3-none-any.whl", hash = "sha256:0a024d7f2de88d738d7395ff866997314c837be6104e90c5724350313dee4da4"}, {file = "alembic-1.8.1.tar.gz", hash = "sha256:cd0b5e45b14b706426b833f06369b9a6d5ee03f826ec3238723ce8caaf6e5ffa"}, ] -antlr4-python3-runtime = [ - {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, -] -async-timeout = [ - {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, - {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, -] attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, @@ -2815,10 +2562,6 @@ etils = [ {file = "etils-0.7.1-py3-none-any.whl", hash = "sha256:9b5140835372e3db8b6e8d7da9e16beb2390ae6f24c253107c8127e4d2205189"}, {file = "etils-0.7.1.tar.gz", hash = "sha256:207c097dd4180d5e5ab1c7b7ee78b7bf9471e12534deace23b1d392debd292f3"}, ] -fasteners = [ - {file = "fasteners-0.15-py2.py3-none-any.whl", hash = "sha256:007e4d2b2d4a10093f67e932e5166722d2eab83b77724156e92ad013c6226574"}, - {file = "fasteners-0.15.tar.gz", hash = "sha256:3a176da6b70df9bb88498e1a18a9e4a8579ed5b9141207762368a1017bf8f5ef"}, -] filelock = [ {file = "filelock-3.8.0-py3-none-any.whl", hash = "sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4"}, {file = "filelock-3.8.0.tar.gz", hash = "sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc"}, @@ -2831,10 +2574,6 @@ fonttools = [ {file = "fonttools-4.37.1-py3-none-any.whl", hash = "sha256:fff6b752e326c15756c819fe2fe7ceab69f96a1dbcfe8911d0941cdb49905007"}, {file = "fonttools-4.37.1.zip", hash = "sha256:4606e1a88ee1f6699d182fea9511bd9a8a915d913eab4584e5226da1180fcce7"}, ] -free-mujoco-py = [ - {file = "free-mujoco-py-2.1.6.tar.gz", hash = "sha256:77e18302e21979bbd77a7c1584070815843cab1b1249f8a17667e15aba528a9a"}, - {file = "free_mujoco_py-2.1.6-py3-none-any.whl", hash = "sha256:f541d84b6bd87919ccf28f5a708681ca90560a945d104aca393d89275790efb8"}, -] ghp-import = [ {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, @@ -3009,7 +2748,7 @@ grpcio = [ {file = "grpcio-1.48.1.tar.gz", hash = "sha256:660217eccd2943bf23ea9a36e2a292024305aec04bf747fbcff1f5032b83610e"}, ] gym = [ - {file = "gym-0.23.1.tar.gz", hash = "sha256:d0f9b9da34edbdace421c9442fc9205d03b8d15d0fb451053c766cde706d40e0"}, + {file = "gym-0.26.1.tar.gz", hash = "sha256:a632c3fcee0b4fcb8d110c127fe0e4a195195cf0a3707da4578564afbed96512"}, ] gym-notices = [ {file = "gym-notices-0.0.8.tar.gz", hash = "sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911"}, @@ -3065,8 +2804,6 @@ iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] -isaacgym = [] -isaacgymenvs = [] jax = [ {file = "jax-0.3.17.tar.gz", hash = "sha256:2a2794e4e0c93595a1b1d625026580c0686be93bd60d4f6906b090446692cadc"}, ] @@ -3095,10 +2832,6 @@ joblib = [ {file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"}, {file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"}, ] -jsonschema = [ - {file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"}, - {file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"}, -] kiwisolver = [ {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"}, {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"}, @@ -3526,22 +3259,6 @@ multi-agent-ale-py = [ {file = "multi_agent_ale_py-0.1.11-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:48a6729423e5b5f47c06b98a25c18e726e2227e60abfd3d70735dc9a48c549d7"}, {file = "multi_agent_ale_py-0.1.11-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7fd316dba5f05cdd2a29b0037433f9f15666fd4a0c2d98f3d85ca57a73d2b423"}, ] -ninja = [ - {file = "ninja-1.10.2.3-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:d5e0275d28997a750a4f445c00bdd357b35cc334c13cdff13edf30e544704fbd"}, - {file = "ninja-1.10.2.3-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ea785bf6a15727040835256577239fa3cf5da0d60e618c307aa5efc31a1f0ce"}, - {file = "ninja-1.10.2.3-py2.py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29570a18d697fc84d361e7e6330f0021f34603ae0fcb0ef67ae781e9814aae8d"}, - {file = "ninja-1.10.2.3-py2.py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a1d84d4c7df5881bfd86c25cce4cf7af44ba2b8b255c57bc1c434ec30a2dfc"}, - {file = "ninja-1.10.2.3-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ca8dbece144366d5f575ffc657af03eb11c58251268405bc8519d11cf42f113"}, - {file = "ninja-1.10.2.3-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:279836285975e3519392c93c26e75755e8a8a7fafec9f4ecbb0293119ee0f9c6"}, - {file = "ninja-1.10.2.3-py2.py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:cc8b31b5509a2129e4d12a35fc21238c157038022560aaf22e49ef0a77039086"}, - {file = "ninja-1.10.2.3-py2.py3-none-musllinux_1_1_i686.whl", hash = "sha256:688167841b088b6802e006f911d911ffa925e078c73e8ef2f88286107d3204f8"}, - {file = "ninja-1.10.2.3-py2.py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:840a0b042d43a8552c4004966e18271ec726e5996578f28345d9ce78e225b67e"}, - {file = "ninja-1.10.2.3-py2.py3-none-musllinux_1_1_s390x.whl", hash = "sha256:84be6f9ec49f635dc40d4b871319a49fa49b8d55f1d9eae7cd50d8e57ddf7a85"}, - {file = "ninja-1.10.2.3-py2.py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6bd76a025f26b9ae507cf8b2b01bb25bb0031df54ed685d85fc559c411c86cf4"}, - {file = "ninja-1.10.2.3-py2.py3-none-win32.whl", hash = "sha256:740d61fefb4ca13573704ee8fe89b973d40b8dc2a51aaa4e9e68367233743bb6"}, - {file = "ninja-1.10.2.3-py2.py3-none-win_amd64.whl", hash = "sha256:0560eea57199e41e86ac2c1af0108b63ae77c3ca4d05a9425a750e908135935a"}, - {file = "ninja-1.10.2.3.tar.gz", hash = "sha256:e1b86ad50d4e681a7dbdff05fc23bb52cb773edb90bc428efba33fa027738408"}, -] nodeenv = [ {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"}, {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"}, @@ -3583,10 +3300,6 @@ oauthlib = [ {file = "oauthlib-3.2.1-py3-none-any.whl", hash = "sha256:88e912ca1ad915e1dcc1c06fc9259d19de8deacd6fd17cc2df266decc2e49066"}, {file = "oauthlib-3.2.1.tar.gz", hash = "sha256:1565237372795bf6ee3e5aba5e2a85bd5a65d0e2aa5c628b9a97b7d7a0da3721"}, ] -omegaconf = [ - {file = "omegaconf-2.2.3-py3-none-any.whl", hash = "sha256:d6f2cbf79a992899eb76c6cb1aedfcf0fe7456a8654382edd5ee0c1b199c0657"}, - {file = "omegaconf-2.2.3.tar.gz", hash = "sha256:59ff9fba864ffbb5fb710b64e8a9ba37c68fa339a2e2bb4f1b648d6901552523"}, -] opencv-python = [ {file = "opencv-python-4.6.0.66.tar.gz", hash = "sha256:c5bfae41ad4031e66bb10ec4a0a2ffd3e514d092652781e8b1ac98d1b59f1158"}, {file = "opencv_python-4.6.0.66-cp36-abi3-macosx_10_15_x86_64.whl", hash = "sha256:e6e448b62afc95c5b58f97e87ef84699e6607fe5c58730a03301c52496005cae"}, @@ -3714,10 +3427,6 @@ Pillow = [ {file = "Pillow-9.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:0030fdbd926fb85844b8b92e2f9449ba89607231d3dd597a21ae72dc7fe26927"}, {file = "Pillow-9.2.0.tar.gz", hash = "sha256:75e636fd3e0fb872693f23ccb8a5ff2cd578801251f3a4f6854c6a5d437d3c04"}, ] -pkgutil_resolve_name = [ - {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, - {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, -] platformdirs = [ {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, @@ -3851,77 +3560,10 @@ pyasn1-modules = [ {file = "pyasn1_modules-0.2.8-py3.6.egg", hash = "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0"}, {file = "pyasn1_modules-0.2.8-py3.7.egg", hash = "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd"}, ] -pybullet = [ - {file = "pybullet-3.1.8-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0ae9bef2f8ea6b30ee0f8fbc7051801d9b47e82ec8637a47e47a0675d4f03fa4"}, - {file = "pybullet-3.1.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d47ca592b2621fd05e2c234f15858293e6a53bb04cbd29df5c06d5ca7ef2c6e5"}, - {file = "pybullet-3.1.8-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2f4741953b56917231a44ff5767e2b5bb2e8bb2cb1ea6488f555f6754ef90751"}, - {file = "pybullet-3.1.8-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:df0ccb74e8e307506fd6cc57e5ad7b2c3b7d23ec2a7c8fa16c02fae863ded904"}, - {file = "pybullet-3.1.8.tar.gz", hash = "sha256:a7e6c7c77cab39e1559c98e4290c5138247b15d3a26a76a23b2737c159f3f905"}, -] pycparser = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] -pygame = [ - {file = "pygame-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c84a93e6d33dafce9e25080ac557342333e15ef7e378ba84cb6181c52a8fd663"}, - {file = "pygame-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0842458b49257ab539b7b6622a242cabcddcb61178b8ae074aaceb890be75b6"}, - {file = "pygame-2.1.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6efa3fa472acb97c784224b59a89e80da6231f0dbf54df8442ffa3352c0534d6"}, - {file = "pygame-2.1.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:02a26b3be6cc478f18f4efa506ee5a585f68350857ac5e68e187301e943e3d6d"}, - {file = "pygame-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c62fbdb30082f7e1dcfa253da48e7b4be7342d275b34b2efa51f6cffc5942b"}, - {file = "pygame-2.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a305dcf44f03a8dd7baefb97dc24949d7e719fd686cd3211121639aec4ce464"}, - {file = "pygame-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:847b4bc22edb1d77c992b5d56b19e1ab52e14687adb8bc3ed12a8a98fbd7e1ff"}, - {file = "pygame-2.1.0-cp310-cp310-win32.whl", hash = "sha256:e9368c105a8bccc8adfe7fd7fa5220d2b6c03979a3a57a8178c42f6fa9914ebc"}, - {file = "pygame-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a81d057a7dea95850e44118f141a892fde93c938ccb08fbc5dd7f1a26c2f1fe"}, - {file = "pygame-2.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ada3d33e7e6907d5c3bf771dc58c47ee6994a1e28fed55e4f8f8b817367beb8f"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5a3edc8211d0cf39d1e4d7ded1a0727c53aeb21205963f184199521708bbb05c"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:53c6fa767e3eef52d403eda5d032e48b6040ccce03fbd64af2f71843168118da"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c28c6f764aa03a0245db12346f1da327c6f49bcc20e53aefec6eed57e4fbe1ce"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5d36d530a8994c5bb8889816981f82b7942d8ec7651ca1d922d01302c1feecd2"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdd488daa4ad33748d5ea806e311bfe01b9cc506def5288400072fcd66d226cf"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9284e76923777c21b8bea19d8528be9cd62d0915139ed3c3cde6c43f849466f5"}, - {file = "pygame-2.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:49e5fb589a86169aa95b83d3429ee034799792374e13dbc0da83091d86365a4b"}, - {file = "pygame-2.1.0-cp36-cp36m-win32.whl", hash = "sha256:c6ee571995527e779b46cafee7ebef2dceb1a9c375143828e019293ff0efa167"}, - {file = "pygame-2.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:b400edd7391972e75b4243113089d6ea10b032e1306e8721efabb36d33c2d0f2"}, - {file = "pygame-2.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0d2f80b501aacd74a660d4422793ea1cd4e209bee385aac18d0a07bd671511ee"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:32cb64627c2eb5c4c067ffe614e08ccb8987d096100d225e070dddce05725b63"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38b5a43ab02c162501e62b857ff2cb128076b0786dd4e1d8bea63db8326f9da1"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba5bf655c892bbf4a9bafb4fcbc4c71023cc9a65f0cae0f3eba09a11018a858e"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:add546fcbf8954f00647f5e7d595ab9389f6a7542a99fc5dca514e14fd799773"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:987c0d5fcd7737c31b35df06f78932c48eeff2c97473001e224fdebd3292b2db"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:594234050b50b57c538842155dc3095c9d4f994266325adb4dd008aee526157f"}, - {file = "pygame-2.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:59a5461ef317e4d233d1bb5ce63311ccad3e911a652bda159d3922351050158c"}, - {file = "pygame-2.1.0-cp37-cp37m-win32.whl", hash = "sha256:9b2ad10ffaa226ca40ae229143b0a118426aff42e2459b626d355846c59a765d"}, - {file = "pygame-2.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4f73058569573af12c8181e032745f11d85f0799510965d938b1f16c7f13afcb"}, - {file = "pygame-2.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:85844714f82a5379100825473b1a7b24192b4a944aed3128da9386e26adc3bed"}, - {file = "pygame-2.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b0e96c0f68f6bb88da216765920c6dbc55ae83e70435d8ebac87d271fc058646"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3d5a76fa826202182d989e8399fca0c3c163fbb4f8ece773e77955a7a62cbed3"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2bfefabe78bda7a1bfba253cbe2131038402ce2b32e4218feeba6431fe429abb"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3804476fab6ec7230aa817ee5c3b378ba956321fdd5f91f51c97452c588869d2"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70a11eec9bae6e8970c5bc4b3d0908eb2c42d4bd4ed488e41e49774b7cb41f57"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eff1db92d53dc2e49ed832dd6c76530e1e2b5954eef091f6af41b41d2d5c3ac"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1eb91198fc47c2e4fdc19c544b5d94534a70fd877f5c342228feb05e9fc4bef"}, - {file = "pygame-2.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:15d4e42214f93d8c60120e16b690ad03da7f0b3b66f75db8966bccf8c66c4690"}, - {file = "pygame-2.1.0-cp38-cp38-win32.whl", hash = "sha256:e533f4bf9dc1a91cfd608b9bfb028c6a92383e731c502660933f0f9b812045a6"}, - {file = "pygame-2.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:692fe4498c353d663d45d05354fb47c9f6bf324d10b53844b9ed7f60e6c8cefa"}, - {file = "pygame-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:472b81ba6b61ffe5879ac3d0da2e5cb235e0e4da471ad4038f013a7710ab53ab"}, - {file = "pygame-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb55368d455ab9518b97febd33a8d417988397b019c9408993be034e0b5a7db6"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f8379052cfbc278b11e31bc97f2e7f5998959c50837c4d54f4e424a541e0c5d9"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b545634f96132af1d31dcb873cf03a9c4a5654ae39d9ee126db0b2eba2806788"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eb3dede55d005adea8504f8c9230b9dc2c84c1c728efe93a9718fa1af824dc8"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f628f9f26c8dadf72fabc9ae0ce5fe7f60d76be71a3407abc756b4d1fd030fa0"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4061ac4e81bb36ec8f0a7027582c1c4dd32a939882e008165627103cb0b3985"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fad7b5351931cb68d19d7ecc0b21021fe23237d8fba8c455b5af4a79e1c7c536"}, - {file = "pygame-2.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0ab3e4763e0cebf08c55154f4167cdae3683674604a71e1437123225f2a9b36"}, - {file = "pygame-2.1.0-cp39-cp39-win32.whl", hash = "sha256:64ec45215c2cfc4051bb0f58d26aee3b50a39b1b0a2e6fe8417bb352a6443aad"}, - {file = "pygame-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:86c66b917afc6330a91ac8c7169c36c77ec536578d1d7724644d41f904e2d146"}, - {file = "pygame-2.1.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:b0e405fdde643f14d60c2dd140f110a5a38f588396a8b61a1a86374f25cba589"}, - {file = "pygame-2.1.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:646e871ff5ab7f933cde5ea2bff7b6cd74d7369f43e84a291baebe00bb9a8f6f"}, - {file = "pygame-2.1.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:88a2dabe617e6173003b65762c636947719da3e2d881a4ea47298e8d70886386"}, - {file = "pygame-2.1.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7281366b4ebd7f16eac8ec6a6e2adb4c729beda178ea82637d9981e93dd40c9b"}, - {file = "pygame-2.1.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0227728f2ef751fac43b89f4bcc5c65ce39c855b2a3391ddf2e6024dd667e6bd"}, - {file = "pygame-2.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab5aba8677d135b94c4714e8256efdfffefc164f354a4d05b846588caf43b99"}, - {file = "pygame-2.1.0.tar.gz", hash = "sha256:232e51104db0e573221660d172af8e6fc2c0fda183c5dbf2aa52170f29aa9ec9"}, -] Pygments = [ {file = "Pygments-2.13.0-py3-none-any.whl", hash = "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42"}, {file = "Pygments-2.13.0.tar.gz", hash = "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1"}, @@ -3946,29 +3588,6 @@ pyreadline3 = [ {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] -pyrsistent = [ - {file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"}, - {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"}, - {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e"}, - {file = "pyrsistent-0.18.1-cp310-cp310-win32.whl", hash = "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6"}, - {file = "pyrsistent-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec"}, - {file = "pyrsistent-0.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b"}, - {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc"}, - {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22"}, - {file = "pyrsistent-0.18.1-cp37-cp37m-win32.whl", hash = "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8"}, - {file = "pyrsistent-0.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286"}, - {file = "pyrsistent-0.18.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6"}, - {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec"}, - {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c"}, - {file = "pyrsistent-0.18.1-cp38-cp38-win32.whl", hash = "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca"}, - {file = "pyrsistent-0.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a"}, - {file = "pyrsistent-0.18.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5"}, - {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045"}, - {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c"}, - {file = "pyrsistent-0.18.1-cp39-cp39-win32.whl", hash = "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc"}, - {file = "pyrsistent-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07"}, - {file = "pyrsistent-0.18.1.tar.gz", hash = "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96"}, -] pytest = [ {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"}, {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"}, @@ -3981,10 +3600,6 @@ pytz = [ {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, ] -PyVirtualDisplay = [ - {file = "PyVirtualDisplay-3.0-py3-none-any.whl", hash = "sha256:40d4b8dfe4b8de8552e28eb367647f311f88a130bf837fe910e7f180d5477f0e"}, - {file = "PyVirtualDisplay-3.0.tar.gz", hash = "sha256:09755bc3ceb6eb725fb07eca5425f43f2358d3bf08e00d2a9b792a1aedd16159"}, -] PyYAML = [ {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, @@ -4020,26 +3635,6 @@ pyyaml_env_tag = [ {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, ] -ray = [ - {file = "ray-1.10.0-cp36-cp36m-macosx_10_15_intel.whl", hash = "sha256:e27172af33243604bdb6c95c6a5d005f5a1524de7fc259f17d2340675a18a0a0"}, - {file = "ray-1.10.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:9a0e72708277be307c6a7e4628c06d19edbed9c1494bfddfe4990aa71de7caad"}, - {file = "ray-1.10.0-cp36-cp36m-win_amd64.whl", hash = "sha256:81e251c64c5f00aa285b4292d3bab2d8c7ff52b09eaac33e927e2e8d8860961e"}, - {file = "ray-1.10.0-cp37-cp37m-macosx_10_15_intel.whl", hash = "sha256:7b6d63c5e6bfcab725aded6394a654470ca1c2e33114bca24d33494bd754f127"}, - {file = "ray-1.10.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:033af681fe959f9412d98a2addefee4e196bd194180170f9246b1c7756abebc2"}, - {file = "ray-1.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1a310ea9c5d17e04f3ba346bae2901915380455f54bfcd168c3336161cd9fbf7"}, - {file = "ray-1.10.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:0c73ec6fc93e58e7f871f01037aa943886d2afde79bd8dbe4f2748c0a9e9cf7e"}, - {file = "ray-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8445c615c2f621f36bfb78833b5d00caa8ccb77d2cbd8c7d16ebe6e8f7d597ea"}, - {file = "ray-1.10.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:e5992bb9128a14d47d2ac104086f07bd90300707b73ce532ce71f8c4c8a2fa4d"}, - {file = "ray-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:f8cf23ba4f319ca0ea202a74b738bb7e1995cb182100f10458b97a5785cb7290"}, - {file = "ray-1.10.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:33584a13b175bddd440a12445c369b174bda3f79e3d6a5345d665a622eede0e0"}, - {file = "ray-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:669548bfaae3a0bcc2ddd7b515b874b2a66e4268b0947050b7d6f53794bd2ae3"}, - {file = "ray-1.10.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:e789b18f179ae573710c718c091a03b51a8c61205e8531b92079a323ac6379a5"}, - {file = "ray-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:db953540f0f7e5e666d263f847b45ad368e9de5ad02da9df4e9e5ee508d5d6b6"}, -] -redis = [ - {file = "redis-4.3.4-py3-none-any.whl", hash = "sha256:a52d5694c9eb4292770084fa8c863f79367ca19884b329ab574d5cb2036b3e54"}, - {file = "redis-4.3.4.tar.gz", hash = "sha256:ddf27071df4adf3821c4f2ca59d67525c3a82e5f268bed97b813cb4fabf87880"}, -] requests = [ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, @@ -4052,10 +3647,6 @@ rich = [ {file = "rich-11.2.0-py3-none-any.whl", hash = "sha256:d5f49ad91fb343efcae45a2b2df04a9755e863e50413623ab8c9e74f05aee52b"}, {file = "rich-11.2.0.tar.gz", hash = "sha256:1a6266a5738115017bb64a66c59c717e7aa047b3ae49a011ede4abdeffc6536e"}, ] -rl-games = [ - {file = "rl-games-1.5.2.tar.gz", hash = "sha256:6d4f5513c917115eed8ebdcab89d0086ea035ce1d0c992dbfba0401c64c63547"}, - {file = "rl_games-1.5.2-py3-none-any.whl", hash = "sha256:104cf667c02c90e4604221bf6d4ea58f231cfb3d7678d62d453308d69d58e4e5"}, -] rsa = [ {file = "rsa-4.7.2-py3-none-any.whl", hash = "sha256:78f9a9bf4e7be0c5ded4583326e7461e3a3c5aae24073648b4bdfa797d78c9d2"}, {file = "rsa-4.7.2.tar.gz", hash = "sha256:9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9"}, @@ -4285,13 +3876,6 @@ tensorboard-data-server = [ tensorboard-plugin-wit = [ {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"}, ] -tensorboardX = [ - {file = "tensorboardX-2.5.1-py2.py3-none-any.whl", hash = "sha256:8808133ccca673cd04076f6f2a85cf2d39bb2d0393a0f20d0f9cbb06d472b57e"}, - {file = "tensorboardX-2.5.1.tar.gz", hash = "sha256:ea85a3446f22ce8a917fe4fa4d8a7a96222ef84ac835267d038c34bb99f6d61b"}, -] -termcolor = [ - {file = "termcolor-1.1.0.tar.gz", hash = "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"}, -] threadpoolctl = [ {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, @@ -4334,27 +3918,6 @@ torch = [ {file = "torch-1.12.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:bfec2843daa654f04fda23ba823af03e7b6f7650a873cdb726752d0e3718dada"}, {file = "torch-1.12.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:69fe2cae7c39ccadd65a123793d30e0db881f1c1927945519c5c17323131437e"}, ] -torchvision = [ - {file = "torchvision-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19286a733c69dcbd417b86793df807bd227db5786ed787c17297741a9b0d0fc7"}, - {file = "torchvision-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:08f592ea61836ebeceb5c97f4d7a813b9d7dc651bbf7ce4401563ccfae6a21fc"}, - {file = "torchvision-0.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:ef5fe3ec1848123cd0ec74c07658192b3147dcd38e507308c790d5943e87b88c"}, - {file = "torchvision-0.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:099874088df104d54d8008f2a28539ca0117b512daed8bf3c2bbfa2b7ccb187a"}, - {file = "torchvision-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:8e4d02e4d8a203e0c09c10dfb478214c224d080d31efc0dbf36d9c4051f7f3c6"}, - {file = "torchvision-0.13.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e631241bee3661de64f83616656224af2e3512eb2580da7c08e08b8c965a8ac"}, - {file = "torchvision-0.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:899eec0b9f3b99b96d6f85b9aa58c002db41c672437677b553015b9135b3be7e"}, - {file = "torchvision-0.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:83e9e2457f23110fd53b0177e1bc621518d6ea2108f570e853b768ce36b7c679"}, - {file = "torchvision-0.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7552e80fa222252b8b217a951c85e172a710ea4cad0ae0c06fbb67addece7871"}, - {file = "torchvision-0.13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f230a1a40ed70d51e463ce43df243ec520902f8725de2502e485efc5eea9d864"}, - {file = "torchvision-0.13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e9a563894f9fa40692e24d1aa58c3ef040450017cfed3598ff9637f404f3fe3b"}, - {file = "torchvision-0.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7cb789ceefe6dcd0dc8eeda37bfc45efb7cf34770eac9533861d51ca508eb5b3"}, - {file = "torchvision-0.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:87c137f343197769a51333076e66bfcd576301d2cd8614b06657187c71b06c4f"}, - {file = "torchvision-0.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:4d8bf321c4380854ef04613935fdd415dce29d1088a7ff99e06e113f0efe9203"}, - {file = "torchvision-0.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0298bae3b09ac361866088434008d82b99d6458fe8888c8df90720ef4b347d44"}, - {file = "torchvision-0.13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c5ed609c8bc88c575226400b2232e0309094477c82af38952e0373edef0003fd"}, - {file = "torchvision-0.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3567fb3def829229ec217c1e38f08c5128ff7fb65854cac17ebac358ff7aa309"}, - {file = "torchvision-0.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b167934a5943242da7b1e59318f911d2d253feeca0d13ad5d832b58eed943401"}, - {file = "torchvision-0.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0e77706cc90462653620e336bb90daf03d7bf1b88c3a9a3037df8d111823a56e"}, -] tqdm = [ {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"}, {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"}, @@ -4418,72 +3981,6 @@ wheel = [ {file = "wheel-0.37.1-py2.py3-none-any.whl", hash = "sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a"}, {file = "wheel-0.37.1.tar.gz", hash = "sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4"}, ] -wrapt = [ - {file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"}, - {file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"}, - {file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, - {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, - {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"}, - {file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"}, - {file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"}, - {file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"}, - {file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"}, - {file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"}, - {file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"}, - {file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"}, - {file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"}, - {file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"}, - {file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"}, - {file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"}, - {file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"}, - {file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"}, - {file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"}, - {file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"}, - {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"}, - {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"}, -] zipp = [ {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, diff --git a/pyproject.toml b/pyproject.toml index 68c58b196..256f018c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,11 +59,6 @@ ale-py = "0.7.4" AutoROM = {extras = ["accept-rom-license"], version = "^0.4.2"} opencv-python = "^4.6.0.66" -[tool.poetry.group.pybullet] -optional = true -[tool.poetry.group.pybullet.dependencies] -pybullet = "3.1.8" - [tool.poetry.group.procgen] optional = true [tool.poetry.group.procgen.dependencies]