Merge pull request #1 from vmoens/update_dqn_example

Update dqn example
pytorch · Sep 8, 2023 · d1dfc1b · d1dfc1b
2 parents 51b673a + c673a31
commit d1dfc1b
Show file tree

Hide file tree

Showing 14 changed files with 184 additions and 86 deletions.
diff --git a/.circleci/unittest/linux/scripts/run_all.sh b/.circleci/unittest/linux/scripts/run_all.sh
@@ -122,10 +122,21 @@ fi
 git submodule sync && git submodule update --init --recursive
 
 printf "Installing PyTorch with %s\n" "${CU_VERSION}"
-if [ "${CU_VERSION:-}" == cpu ] ; then
-    pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+if [[ "$TORCH_VERSION" == "nightly" ]]; then
+  if [ "${CU_VERSION:-}" == cpu ] ; then
+      pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+  else
+      pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
+  fi
+elif [[ "$TORCH_VERSION" == "stable" ]]; then
+    if [ "${CU_VERSION:-}" == cpu ] ; then
+      pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+  else
+      pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$CU_VERSION
+  fi
 else
-    pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
+  printf "Failed to install pytorch"
+  exit 1
 fi
 
 # smoke test

diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml
@@ -34,6 +34,7 @@ jobs:
         # Set env vars from matrix
         export PYTHON_VERSION=${{ matrix.python_version }}
         export CU_VERSION="cpu"
+        export TORCH_VERSION=nightly
 
         echo "PYTHON_VERSION: $PYTHON_VERSION"
         echo "CU_VERSION: $CU_VERSION"

diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
@@ -39,6 +39,7 @@ jobs:
         # Commenting these out for now because the GPU test are not working inside docker
         export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }}
         export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}"
+        export TORCH_VERSION=nightly
         # Remove the following line when the GPU tests are working inside docker, and uncomment the above lines
         #export CU_VERSION="cpu"
 

diff --git a/.github/workflows/test-linux-stable-gpu.yml b/.github/workflows/test-linux-stable-gpu.yml
@@ -0,0 +1,50 @@
+name: Unit-tests on Linux GPU, latest stable release
+
+on:
+  pull_request:
+  push:
+    branches:
+      - nightly
+      - main
+      - release/*
+  workflow_dispatch:
+
+env:
+  CHANNEL: "nightly"
+
+concurrency:
+  # Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
+  # On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
+  cancel-in-progress: true
+
+jobs:
+  tests:
+    strategy:
+      matrix:
+        python_version: ["3.9"] # "3.8", "3.9", "3.10", "3.11"
+        cuda_arch_version: ["11.8"] # "11.6", "11.7"
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      repository: pytorch/rl
+      docker-image: "nvidia/cuda:12.1.0-devel-ubuntu22.04"
+      gpu-arch-type: cuda
+      gpu-arch-version: ${{ matrix.cuda_arch_version }}
+      timeout: 90
+      script: |
+        # Set env vars from matrix
+        export PYTHON_VERSION=${{ matrix.python_version }}
+        # Commenting these out for now because the GPU test are not working inside docker
+        export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }}
+        export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}"
+        export TORCH_VERSION=stable
+        # Remove the following line when the GPU tests are working inside docker, and uncomment the above lines
+        #export CU_VERSION="cpu"
+
+        echo "PYTHON_VERSION: $PYTHON_VERSION"
+        echo "CU_VERSION: $CU_VERSION"
+
+        ## setup_env.sh
+        bash .circleci/unittest/linux/scripts/run_all.sh
diff --git a/.github/workflows/test-macos-cpu.yml b/.github/workflows/test-macos-cpu.yml
@@ -33,6 +33,7 @@ jobs:
         export PYTHON_VERSION=${{ matrix.python_version }}
         export CU_VERSION="cpu"
         export SYSTEM_VERSION_COMPAT=0
+        export TORCH_VERSION=nightly
 
         echo "PYTHON_VERSION: $PYTHON_VERSION"
         echo "CU_VERSION: $CU_VERSION"

diff --git a/examples/dqn/dqn_atari.py b/examples/dqn/dqn_atari.py
@@ -3,40 +3,36 @@
 Deep Q-Learning Algorithm on Atari Environments.
 """
 
-import gym
-import tqdm
-import time
 import random
+import time
+
+import gym
+import numpy as np
 import torch.nn
 import torch.optim
-import numpy as np
+import tqdm
 from tensordict import TensorDict
 
 from torchrl.collectors import SyncDataCollector
 from torchrl.data import CompositeSpec, LazyMemmapStorage, TensorDictReplayBuffer
-from torchrl.envs.libs.gym import GymWrapper
 from torchrl.envs import (
+    CatFrames,
     default_info_dict_reader,
-    Resize,
-    VecNorm,
+    DoubleToFloat,
+    ExplorationType,
     GrayScale,
+    NoopResetEnv,
+    Resize,
+    RewardClipping,
     RewardSum,
-    CatFrames,
+    set_exploration_type,
     StepCounter,
     ToTensorImage,
-    DoubleToFloat,
-    RewardClipping,
     TransformedEnv,
-    NoopResetEnv,
-    ExplorationType,
-    set_exploration_type,
-)
-from torchrl.modules import (
-    MLP,
-    ConvNet,
-    QValueActor,
-    EGreedyWrapper,
+    VecNorm,
 )
+from torchrl.envs.libs.gym import GymWrapper
+from torchrl.modules import ConvNet, EGreedyWrapper, MLP, QValueActor
 from torchrl.objectives import DQNLoss, HardUpdate
 from torchrl.record.loggers import generate_exp_name, get_logger
 
@@ -91,6 +87,7 @@ def make_env(env_name, device, is_test=False):
     # env.append_transform(VecNorm(in_keys=["pixels"]))
     return env
 
+
 # ====================================================================
 # Model utils
 # --------------------------------------------------------------------
@@ -137,6 +134,7 @@ def make_dqn_model(env_name):
 # Collector utils
 # --------------------------------------------------------------------
 
+
 def make_collector(env_name, policy, device):
     collector_class = SyncDataCollector
     collector = collector_class(
@@ -151,15 +149,16 @@ def make_collector(env_name, policy, device):
     collector.set_seed(seed)
     return collector
 
+
 # ====================================================================
 # Collector and replay buffer utils
 # --------------------------------------------------------------------
 
 
 def make_replay_buffer(
-        batch_size,
-        buffer_scratch_dir="/tmp/",
-        prefetch=3,
+    batch_size,
+    buffer_scratch_dir="/tmp/",
+    prefetch=3,
 ):
     replay_buffer = TensorDictReplayBuffer(
         pin_memory=False,
@@ -173,6 +172,7 @@ def make_replay_buffer(
     )
     return replay_buffer
 
+
 # ====================================================================
 # Discrete DQN Loss
 # --------------------------------------------------------------------
@@ -187,9 +187,12 @@ def make_loss_module(value_network):
         delay_value=True,
     )
     dqn_loss.make_value_estimator(gamma=gamma)
-    targ_net_updater = HardUpdate(dqn_loss, value_network_update_interval=hard_update_freq)
+    targ_net_updater = HardUpdate(
+        dqn_loss, value_network_update_interval=hard_update_freq
+    )
     return dqn_loss, targ_net_updater
 
+
 # ====================================================================
 # Other component utils
 # --------------------------------------------------------------------
@@ -234,7 +237,9 @@ def make_logger(backend="csv"):
 
     # Make the components
     model = make_dqn_model(env_name)
-    model_explore = EGreedyWrapper(model, annealing_num_steps=annealing_frames, eps_end=end_e).to(device)
+    model_explore = EGreedyWrapper(
+        model, annealing_num_steps=annealing_frames, eps_end=end_e
+    ).to(device)
     collector = make_collector(env_name, model_explore, device)
     replay_buffer = make_replay_buffer(batch_size)
     loss_module, target_net_updater = make_loss_module(model)
@@ -254,8 +259,14 @@ def make_logger(backend="csv"):
         episode_rewards = data["next", "episode_reward"][data["next", "done"]]
         if len(episode_rewards) > 0:
             episode_length = data["next", "step_count"][data["next", "done"]]
-            logger.log_scalar("reward_train", episode_rewards.mean().item(), collected_frames)
-            logger.log_scalar("episode_length_train", episode_length.sum().item() / len(episode_length), collected_frames)
+            logger.log_scalar(
+                "reward_train", episode_rewards.mean().item(), collected_frames
+            )
+            logger.log_scalar(
+                "episode_length_train",
+                episode_length.sum().item() / len(episode_length),
+                collected_frames,
+            )
 
         pbar.update(data.numel())
         data = data.reshape(-1)

diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -383,10 +383,11 @@ def test_transform_compose(self):
         assert data["reward"] == 2
         assert data["reward_clip"] == 0.1
 
-    def test_transform_env(self):
-        env = ContinuousActionVecMockEnv()
+    @pytest.mark.parametrize("device", get_default_devices())
+    def test_transform_env(self, device):
+        base_env = ContinuousActionVecMockEnv(device=device)
         env = TransformedEnv(
-            env,
+            base_env,
             ClipTransform(
                 in_keys=["observation", "reward"],
                 in_keys_inv=["observation_orig"],
@@ -395,6 +396,7 @@ def test_transform_env(self):
             ),
         )
         r = env.rollout(3)
+        assert r.device == device
         assert (r["observation"] <= 0.1).all()
         assert (r["next", "observation"] <= 0.1).all()
         assert (r["next", "reward"] <= 0.1).all()
@@ -426,7 +428,7 @@ def test_transform_env(self):
                 high=-1.0,
             )
         env = TransformedEnv(
-            env,
+            base_env,
             ClipTransform(
                 in_keys=["observation", "reward"],
                 in_keys_inv=["observation_orig"],
@@ -436,7 +438,7 @@ def test_transform_env(self):
         )
         check_env_specs(env)
         env = TransformedEnv(
-            env,
+            base_env,
             ClipTransform(
                 in_keys=["observation", "reward"],
                 in_keys_inv=["observation_orig"],
@@ -446,7 +448,7 @@ def test_transform_env(self):
         )
         check_env_specs(env)
         env = TransformedEnv(
-            env,
+            base_env,
             ClipTransform(
                 in_keys=["observation", "reward"],
                 in_keys_inv=["observation_orig"],
@@ -455,6 +457,16 @@ def test_transform_env(self):
             ),
         )
         check_env_specs(env)
+        env = TransformedEnv(
+            base_env,
+            ClipTransform(
+                in_keys=["observation", "reward"],
+                in_keys_inv=["observation_orig"],
+                low=-torch.ones(()),
+                high=1,
+            ),
+        )
+        check_env_specs(env)
 
     def test_transform_inverse(self):
         t = ClipTransform(

diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -555,10 +555,10 @@ def encode(
                 val = torch.tensor(val, device=self.device, dtype=self.dtype)
             else:
                 val = torch.as_tensor(val, dtype=self.dtype)
-            if val != self.shape:
+            if val.shape != self.shape:
                 # if val.shape[-len(self.shape) :] != self.shape:
                 # option 1: add a singleton dim at the end
-                if val == self.shape and self.shape[-1] == 1:
+                if val.shape == self.shape and self.shape[-1] == 1:
                     val = val.unsqueeze(-1)
                 else:
                     try:

diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -231,6 +231,7 @@ def __init__(
         self.__dict__["_done_keys"] = None
         self.__dict__["_reward_keys"] = None
         self.__dict__["_action_keys"] = None
+        self.__dict__["_batch_size"] = None
         if device is not None:
             self.__dict__["_device"] = torch.device(device)
             output_spec = self.__dict__.get("_output_spec", None)
@@ -320,7 +321,7 @@ def run_type_checks(self, run_type_checks: bool) -> None:
 
     @property
     def batch_size(self) -> torch.Size:
-        _batch_size = getattr(self, "_batch_size", None)
+        _batch_size = self.__dict__["_batch_size"]
         if _batch_size is None:
             _batch_size = self._batch_size = torch.Size([])
         return _batch_size

diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py
@@ -153,7 +153,7 @@ def read_reward(self, reward):
             reward (torch.Tensor or TensorDict): reward to be mapped.
 
         """
-        return self.reward_spec.encode(reward)
+        return self.reward_spec.encode(reward, ignore_device=True)
 
     def read_obs(
         self, observations: Union[Dict[str, Any], torch.Tensor, np.ndarray]