Skip to content

Commit

Permalink
Merge pull request #1 from vmoens/update_dqn_example
Browse files Browse the repository at this point in the history
Update dqn example
  • Loading branch information
vmoens authored Sep 8, 2023
2 parents 51b673a + c673a31 commit d1dfc1b
Show file tree
Hide file tree
Showing 14 changed files with 184 additions and 86 deletions.
17 changes: 14 additions & 3 deletions .circleci/unittest/linux/scripts/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,21 @@ fi
git submodule sync && git submodule update --init --recursive

printf "Installing PyTorch with %s\n" "${CU_VERSION}"
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
if [[ "$TORCH_VERSION" == "nightly" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
else
pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
fi
elif [[ "$TORCH_VERSION" == "stable" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
else
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$CU_VERSION
fi
else
pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
printf "Failed to install pytorch"
exit 1
fi

# smoke test
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-linux-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
# Set env vars from matrix
export PYTHON_VERSION=${{ matrix.python_version }}
export CU_VERSION="cpu"
export TORCH_VERSION=nightly
echo "PYTHON_VERSION: $PYTHON_VERSION"
echo "CU_VERSION: $CU_VERSION"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-linux-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
# Commenting these out for now because the GPU test are not working inside docker
export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }}
export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}"
export TORCH_VERSION=nightly
# Remove the following line when the GPU tests are working inside docker, and uncomment the above lines
#export CU_VERSION="cpu"
Expand Down
50 changes: 50 additions & 0 deletions .github/workflows/test-linux-stable-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: Unit-tests on Linux GPU, latest stable release

on:
pull_request:
push:
branches:
- nightly
- main
- release/*
workflow_dispatch:

env:
CHANNEL: "nightly"

concurrency:
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
cancel-in-progress: true

jobs:
tests:
strategy:
matrix:
python_version: ["3.9"] # "3.8", "3.9", "3.10", "3.11"
cuda_arch_version: ["11.8"] # "11.6", "11.7"
fail-fast: false
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
repository: pytorch/rl
docker-image: "nvidia/cuda:12.1.0-devel-ubuntu22.04"
gpu-arch-type: cuda
gpu-arch-version: ${{ matrix.cuda_arch_version }}
timeout: 90
script: |
# Set env vars from matrix
export PYTHON_VERSION=${{ matrix.python_version }}
# Commenting these out for now because the GPU test are not working inside docker
export CUDA_ARCH_VERSION=${{ matrix.cuda_arch_version }}
export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}"
export TORCH_VERSION=stable
# Remove the following line when the GPU tests are working inside docker, and uncomment the above lines
#export CU_VERSION="cpu"
echo "PYTHON_VERSION: $PYTHON_VERSION"
echo "CU_VERSION: $CU_VERSION"
## setup_env.sh
bash .circleci/unittest/linux/scripts/run_all.sh
1 change: 1 addition & 0 deletions .github/workflows/test-macos-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
export PYTHON_VERSION=${{ matrix.python_version }}
export CU_VERSION="cpu"
export SYSTEM_VERSION_COMPAT=0
export TORCH_VERSION=nightly
echo "PYTHON_VERSION: $PYTHON_VERSION"
echo "CU_VERSION: $CU_VERSION"
Expand Down
63 changes: 37 additions & 26 deletions examples/dqn/dqn_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,36 @@
Deep Q-Learning Algorithm on Atari Environments.
"""

import gym
import tqdm
import time
import random
import time

import gym
import numpy as np
import torch.nn
import torch.optim
import numpy as np
import tqdm
from tensordict import TensorDict

from torchrl.collectors import SyncDataCollector
from torchrl.data import CompositeSpec, LazyMemmapStorage, TensorDictReplayBuffer
from torchrl.envs.libs.gym import GymWrapper
from torchrl.envs import (
CatFrames,
default_info_dict_reader,
Resize,
VecNorm,
DoubleToFloat,
ExplorationType,
GrayScale,
NoopResetEnv,
Resize,
RewardClipping,
RewardSum,
CatFrames,
set_exploration_type,
StepCounter,
ToTensorImage,
DoubleToFloat,
RewardClipping,
TransformedEnv,
NoopResetEnv,
ExplorationType,
set_exploration_type,
)
from torchrl.modules import (
MLP,
ConvNet,
QValueActor,
EGreedyWrapper,
VecNorm,
)
from torchrl.envs.libs.gym import GymWrapper
from torchrl.modules import ConvNet, EGreedyWrapper, MLP, QValueActor
from torchrl.objectives import DQNLoss, HardUpdate
from torchrl.record.loggers import generate_exp_name, get_logger

Expand Down Expand Up @@ -91,6 +87,7 @@ def make_env(env_name, device, is_test=False):
# env.append_transform(VecNorm(in_keys=["pixels"]))
return env


# ====================================================================
# Model utils
# --------------------------------------------------------------------
Expand Down Expand Up @@ -137,6 +134,7 @@ def make_dqn_model(env_name):
# Collector utils
# --------------------------------------------------------------------


def make_collector(env_name, policy, device):
collector_class = SyncDataCollector
collector = collector_class(
Expand All @@ -151,15 +149,16 @@ def make_collector(env_name, policy, device):
collector.set_seed(seed)
return collector


# ====================================================================
# Collector and replay buffer utils
# --------------------------------------------------------------------


def make_replay_buffer(
batch_size,
buffer_scratch_dir="/tmp/",
prefetch=3,
batch_size,
buffer_scratch_dir="/tmp/",
prefetch=3,
):
replay_buffer = TensorDictReplayBuffer(
pin_memory=False,
Expand All @@ -173,6 +172,7 @@ def make_replay_buffer(
)
return replay_buffer


# ====================================================================
# Discrete DQN Loss
# --------------------------------------------------------------------
Expand All @@ -187,9 +187,12 @@ def make_loss_module(value_network):
delay_value=True,
)
dqn_loss.make_value_estimator(gamma=gamma)
targ_net_updater = HardUpdate(dqn_loss, value_network_update_interval=hard_update_freq)
targ_net_updater = HardUpdate(
dqn_loss, value_network_update_interval=hard_update_freq
)
return dqn_loss, targ_net_updater


# ====================================================================
# Other component utils
# --------------------------------------------------------------------
Expand Down Expand Up @@ -234,7 +237,9 @@ def make_logger(backend="csv"):

# Make the components
model = make_dqn_model(env_name)
model_explore = EGreedyWrapper(model, annealing_num_steps=annealing_frames, eps_end=end_e).to(device)
model_explore = EGreedyWrapper(
model, annealing_num_steps=annealing_frames, eps_end=end_e
).to(device)
collector = make_collector(env_name, model_explore, device)
replay_buffer = make_replay_buffer(batch_size)
loss_module, target_net_updater = make_loss_module(model)
Expand All @@ -254,8 +259,14 @@ def make_logger(backend="csv"):
episode_rewards = data["next", "episode_reward"][data["next", "done"]]
if len(episode_rewards) > 0:
episode_length = data["next", "step_count"][data["next", "done"]]
logger.log_scalar("reward_train", episode_rewards.mean().item(), collected_frames)
logger.log_scalar("episode_length_train", episode_length.sum().item() / len(episode_length), collected_frames)
logger.log_scalar(
"reward_train", episode_rewards.mean().item(), collected_frames
)
logger.log_scalar(
"episode_length_train",
episode_length.sum().item() / len(episode_length),
collected_frames,
)

pbar.update(data.numel())
data = data.reshape(-1)
Expand Down
24 changes: 18 additions & 6 deletions test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,11 @@ def test_transform_compose(self):
assert data["reward"] == 2
assert data["reward_clip"] == 0.1

def test_transform_env(self):
env = ContinuousActionVecMockEnv()
@pytest.mark.parametrize("device", get_default_devices())
def test_transform_env(self, device):
base_env = ContinuousActionVecMockEnv(device=device)
env = TransformedEnv(
env,
base_env,
ClipTransform(
in_keys=["observation", "reward"],
in_keys_inv=["observation_orig"],
Expand All @@ -395,6 +396,7 @@ def test_transform_env(self):
),
)
r = env.rollout(3)
assert r.device == device
assert (r["observation"] <= 0.1).all()
assert (r["next", "observation"] <= 0.1).all()
assert (r["next", "reward"] <= 0.1).all()
Expand Down Expand Up @@ -426,7 +428,7 @@ def test_transform_env(self):
high=-1.0,
)
env = TransformedEnv(
env,
base_env,
ClipTransform(
in_keys=["observation", "reward"],
in_keys_inv=["observation_orig"],
Expand All @@ -436,7 +438,7 @@ def test_transform_env(self):
)
check_env_specs(env)
env = TransformedEnv(
env,
base_env,
ClipTransform(
in_keys=["observation", "reward"],
in_keys_inv=["observation_orig"],
Expand All @@ -446,7 +448,7 @@ def test_transform_env(self):
)
check_env_specs(env)
env = TransformedEnv(
env,
base_env,
ClipTransform(
in_keys=["observation", "reward"],
in_keys_inv=["observation_orig"],
Expand All @@ -455,6 +457,16 @@ def test_transform_env(self):
),
)
check_env_specs(env)
env = TransformedEnv(
base_env,
ClipTransform(
in_keys=["observation", "reward"],
in_keys_inv=["observation_orig"],
low=-torch.ones(()),
high=1,
),
)
check_env_specs(env)

def test_transform_inverse(self):
t = ClipTransform(
Expand Down
4 changes: 2 additions & 2 deletions torchrl/data/tensor_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,10 +555,10 @@ def encode(
val = torch.tensor(val, device=self.device, dtype=self.dtype)
else:
val = torch.as_tensor(val, dtype=self.dtype)
if val != self.shape:
if val.shape != self.shape:
# if val.shape[-len(self.shape) :] != self.shape:
# option 1: add a singleton dim at the end
if val == self.shape and self.shape[-1] == 1:
if val.shape == self.shape and self.shape[-1] == 1:
val = val.unsqueeze(-1)
else:
try:
Expand Down
3 changes: 2 additions & 1 deletion torchrl/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def __init__(
self.__dict__["_done_keys"] = None
self.__dict__["_reward_keys"] = None
self.__dict__["_action_keys"] = None
self.__dict__["_batch_size"] = None
if device is not None:
self.__dict__["_device"] = torch.device(device)
output_spec = self.__dict__.get("_output_spec", None)
Expand Down Expand Up @@ -320,7 +321,7 @@ def run_type_checks(self, run_type_checks: bool) -> None:

@property
def batch_size(self) -> torch.Size:
_batch_size = getattr(self, "_batch_size", None)
_batch_size = self.__dict__["_batch_size"]
if _batch_size is None:
_batch_size = self._batch_size = torch.Size([])
return _batch_size
Expand Down
2 changes: 1 addition & 1 deletion torchrl/envs/gym_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def read_reward(self, reward):
reward (torch.Tensor or TensorDict): reward to be mapped.
"""
return self.reward_spec.encode(reward)
return self.reward_spec.encode(reward, ignore_device=True)

def read_obs(
self, observations: Union[Dict[str, Any], torch.Tensor, np.ndarray]
Expand Down
Loading

0 comments on commit d1dfc1b

Please sign in to comment.