[Feature] Re-enable cache for specs

ghstack-source-id: 797132312bfd9749f8926a2dd0b03eff65b8f51c Pull Request resolved: #2730
pytorch · Feb 3, 2025 · 4262ab9 · 4262ab9
1 parent 601483e
commit 4262ab9
Show file tree

Hide file tree

Showing 22 changed files with 578 additions and 357 deletions.
diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh
@@ -80,7 +80,7 @@ export DISPLAY=:0
 export SDL_VIDEODRIVER=dummy
 
 # legacy from bash scripts: remove?
-conda env config vars set MUJOCO_GL=$MUJOCO_GL PYOPENGL_PLATFORM=$MUJOCO_GL DISPLAY=:0 SDL_VIDEODRIVER=dummy LAZY_LEGACY_OP=False RL_LOGGING_LEVEL=DEBUG
+conda env config vars set MUJOCO_GL=$MUJOCO_GL PYOPENGL_PLATFORM=$MUJOCO_GL DISPLAY=:0 SDL_VIDEODRIVER=dummy LAZY_LEGACY_OP=False RL_LOGGING_LEVEL=DEBUG TOKENIZERS_PARALLELISM=true
 
 pip3 install pip --upgrade
 pip install virtualenv

diff --git a/.github/unittest/linux_distributed/scripts/setup_env.sh b/.github/unittest/linux_distributed/scripts/setup_env.sh
@@ -69,7 +69,8 @@ conda env config vars set MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
   LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin \
   SDL_VIDEODRIVER=dummy \
   MUJOCO_GL=$PRIVATE_MUJOCO_GL \
-  PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL
+  PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL \
+  TOKENIZERS_PARALLELISM=true
 
 # Software rendering requires GLX and OSMesa.
 if [ $PRIVATE_MUJOCO_GL == 'egl' ] || [ $PRIVATE_MUJOCO_GL == 'osmesa' ] ; then

diff --git a/.github/unittest/linux_libs/scripts_d4rl/setup_env.sh b/.github/unittest/linux_libs/scripts_d4rl/setup_env.sh
@@ -92,6 +92,7 @@ conda env config vars set \
   MUJOCO_PY_MJKEY_PATH=$root_dir/.mujoco/mjkey.txt \
   SDL_VIDEODRIVER=dummy \
   MUJOCO_GL=$PRIVATE_MUJOCO_GL \
-  PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL
+  PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL \
+  TOKENIZERS_PARALLELISM=true
 
 conda env update --file "${this_dir}/environment.yml" --prune
diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh
@@ -80,6 +80,7 @@ conda env config vars set \
   MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \
   MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \
   LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin
+  TOKENIZERS_PARALLELISM=true
 #  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin
 
 # make env variables apparent

diff --git a/.github/unittest/linux_libs/scripts_habitat/run_test.sh b/.github/unittest/linux_libs/scripts_habitat/run_test.sh
@@ -10,7 +10,7 @@ conda activate ./env
 # https://stackoverflow.com/questions/72540359/glibcxx-3-4-30-not-found-for-librosa-in-conda-virtual-environment-after-tryin
 #conda install -y -c conda-forge gcc=12.1.0
 conda install -y -c conda-forge libstdcxx-ng=12
-conda env config vars set LD_PRELOAD=$LD_PRELOAD:$STDC_LOC
+conda env config vars set LD_PRELOAD=$LD_PRELOAD:$STDC_LOC TOKENIZERS_PARALLELISM=true
 
 ## find libstdc
 STDC_LOC=$(find conda/ -name "libstdc++.so.6" | head -1)
@@ -36,7 +36,7 @@ export MKL_THREADING_LAYER=GNU
 #wget https://github.com/openai/mujoco-py/blob/master/vendor/10_nvidia.json
 #mv 10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
 
-conda env config vars set MAGNUM_LOG=quiet HABITAT_SIM_LOG=quiet
+conda env config vars set MAGNUM_LOG=quiet HABITAT_SIM_LOG=quiet TOKENIZERS_PARALLELISM=true
 
 conda deactivate && conda activate ./env
 

diff --git a/.github/unittest/linux_libs/scripts_habitat/setup_env.sh b/.github/unittest/linux_libs/scripts_habitat/setup_env.sh
@@ -41,7 +41,7 @@ fi
 conda activate "${env_dir}"
 
 # set debug variables
-conda env config vars set MAGNUM_LOG=debug HABITAT_SIM_LOG=debug
+conda env config vars set MAGNUM_LOG=debug HABITAT_SIM_LOG=debug TOKENIZERS_PARALLELISM=true
 conda deactivate && conda activate "${env_dir}"
 
 pip3 install "cython<3"

diff --git a/.github/unittest/linux_libs/scripts_robohive/setup_env.sh b/.github/unittest/linux_libs/scripts_robohive/setup_env.sh
@@ -67,7 +67,8 @@ conda env config vars set \
   PYOPENGL_PLATFORM=egl \
   NVIDIA_PATH=/usr/src/nvidia-470.63.01 \
   sim_backend=MUJOCO \
-  LAZY_LEGACY_OP=False
+  LAZY_LEGACY_OP=False \
+  TOKENIZERS_PARALLELISM=true
 
 # make env variables apparent
 conda deactivate && conda activate "${env_dir}"

diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh
@@ -85,7 +85,8 @@ conda env config vars set \
   NVIDIA_PATH=/usr/src/nvidia-470.63.01 \
   MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \
   MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin
+  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \
+  TOKENIZERS_PARALLELISM=true
 
 # make env variables apparent
 conda deactivate && conda activate "${env_dir}"

diff --git a/.github/unittest/linux_sota/scripts/run_all.sh b/.github/unittest/linux_sota/scripts/run_all.sh
@@ -83,7 +83,8 @@ conda env config vars set MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
   SDL_VIDEODRIVER=dummy \
   MUJOCO_GL=egl \
   PYOPENGL_PLATFORM=egl \
-  BATCHED_PIPE_TIMEOUT=60
+  BATCHED_PIPE_TIMEOUT=60 \
+  TOKENIZERS_PARALLELISM=true
 
 pip install pip --upgrade
 
@@ -100,7 +101,7 @@ pip install git+https://github.com/Farama-Foundation/d4rl@master#egg=d4rl
 conda install -y -c conda-forge libstdcxx-ng=12
 ## find libstdc
 STDC_LOC=$(find conda/ -name "libstdc++.so.6" | head -1)
-conda env config vars set LD_PRELOAD=${root_dir}/$STDC_LOC
+conda env config vars set LD_PRELOAD=${root_dir}/$STDC_LOC TOKENIZERS_PARALLELISM=true
 
 # compile mujoco-py (bc it's done at runtime for whatever reason someone thought it was a good idea)
 python -c """import gym;import d4rl"""

diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -41,19 +41,35 @@ Each env will have the following attributes:
   the done-flag spec. See the section on trajectory termination below.
 - :obj:`env.input_spec`: a :class:`~torchrl.data.Composite` object containing
   all the input keys (:obj:`"full_action_spec"` and :obj:`"full_state_spec"`).
-  It is locked and should not be modified directly.
 - :obj:`env.output_spec`: a :class:`~torchrl.data.Composite` object containing
   all the output keys (:obj:`"full_observation_spec"`, :obj:`"full_reward_spec"` and :obj:`"full_done_spec"`).
-  It is locked and should not be modified directly.
 
-If the environment carries non-tensor data, a :class:`~torchrl.data.NonTensorSpec`
+If the environment carries non-tensor data, a :class:`~torchrl.data.NonTensor`
 instance can be used.
 
+Env specs: locks and batch size
+-------------------------------
+
+.. _Environment-lock:
+
+Environment specs are locked by default (through a ``spec_locked`` arg passed to the env constructor).
+Locking specs means that any modification of the spec (or its children if it is a :class:`~torchrl.data.Composite`
+instance) will require to unlock it. This can be done via the :meth:`~torchrl.envs.EnvBase.set_spec_lock_`.
+The reason specs are locked by default is that it makes it easy to cache values such as action or reset keys and the
+likes.
+Unlocking an env should only be done if it expected that the specs will be modified often (which, in principle, should
+be avoided).
+Modifications of the specs such as `env.observation_spec = new_spec` are allowed: under the hood, TorchRL will erase
+the cache, unlock the specs, make the modification and relock the specs if the env was previously locked.
+
 Importantly, the environment spec shapes should contain the batch size, e.g.
 an environment with :obj:`env.batch_size == torch.Size([4])` should have
 an :obj:`env.action_spec` with shape :obj:`torch.Size([4, action_size])`.
 This is helpful when preallocation tensors, checking shape consistency etc.
 
+Env methods
+-----------
+
 With these, the following methods are implemented:
 
 - :meth:`env.reset`: a reset method that may (but not necessarily requires to) take

diff --git a/test/test_env.py b/test/test_env.py
@@ -9,11 +9,13 @@
 import gc
 import importlib
 import os.path
+import pickle
 import random
 import re
 from collections import defaultdict
 from functools import partial
 from sys import platform
+from typing import Optional
 
 import numpy as np
 import pytest
@@ -246,6 +248,41 @@ def test_run_type_checks(self):
         with pytest.raises(TypeError):
             check_env_specs(env)
 
+    class MyEnv(EnvBase):
+        def __init__(self):
+            super().__init__()
+            self.observation_spec = Unbounded(())
+            self.action_spec = Unbounded(())
+
+        def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+            ...
+
+        def _step(
+            self,
+            tensordict: TensorDictBase,
+        ) -> TensorDictBase:
+            ...
+
+        def _set_seed(self, seed: Optional[int]):
+            ...
+
+    def test_env_lock(self):
+
+        env = self.MyEnv()
+        for _ in range(2):
+            assert env.is_spec_locked
+            assert env.output_spec.is_locked
+            assert env.input_spec.is_locked
+            with pytest.raises(RuntimeError, match="lock"):
+                env.input_spec["full_action_spec", "action"] = Unbounded(())
+            env = pickle.loads(pickle.dumps(env))
+
+        env = self.MyEnv(spec_locked=False)
+        assert not env.is_spec_locked
+        assert not env.output_spec.is_locked
+        assert not env.input_spec.is_locked
+        env.input_spec["full_action_spec", "action"] = Unbounded(())
+
     def test_single_env_spec(self):
         env = NestedCountingEnv(batch_size=[3, 1, 7])
         assert not env.full_action_spec_unbatched.shape
@@ -2294,15 +2331,14 @@ def test_multi_purpose_env(self, serial):
             env = SerialEnv(2, ContinuousActionVecMockEnv)
         else:
             env = ContinuousActionVecMockEnv()
+        env.set_spec_lock_()
         env.rollout(10)
-        assert env._step_mdp.validate(None)
         c = SyncDataCollector(
             env, env.rand_action, frames_per_batch=10, total_frames=20
         )
         for data in c:  # noqa: B007
             pass
         assert ("collector", "traj_ids") in data.keys(True)
-        assert env._step_mdp.validate(None)
         env.rollout(10)
 
         # An exception will be raised when the collector sees extra keys
@@ -3387,6 +3423,10 @@ def policy(td):
 class TestEnvWithDynamicSpec:
     def test_dynamic_rollout(self):
         env = EnvWithDynamicSpec()
+        rollout = env.rollout(4)
+        assert isinstance(rollout, LazyStackedTensorDict)
+        rollout = env.rollout(4, return_contiguous=False)
+        assert isinstance(rollout, LazyStackedTensorDict)
         with pytest.raises(
             RuntimeError,
             match="The environment specs are dynamic. Call rollout with return_contiguous=False",

diff --git a/test/test_libs.py b/test/test_libs.py
@@ -1779,7 +1779,7 @@ def test_jumanji_rendering(self, envname, batch_size):
         # check that this works with a batch-size
         env = JumanjiEnv(envname, from_pixels=True, batch_size=batch_size, jit=True)
         env.set_seed(0)
-        env.transform.transform_observation_spec(env.base_env.observation_spec)
+        env.transform.transform_observation_spec(env.base_env.observation_spec.clone())
 
         r = env.rollout(10)
         pixels = r["pixels"]

diff --git a/test/test_specs.py b/test/test_specs.py
@@ -410,6 +410,30 @@ def test_setitem_matches_device(self, shape, is_complete, device, dtype, dest):
             )
             assert ts["bad"].device == (device if device is not None else dest)
 
+    def test_setitem_nested(self, shape, is_complete, device, dtype):
+        f = Unbounded(shape=shape, device=device, dtype=dtype)
+        g = (
+            None
+            if not is_complete
+            else Unbounded(shape=shape, device=device, dtype=dtype)
+        )
+        test = Composite(
+            a=Composite(b=Composite(c=Composite(d=Composite(e=Composite(f=f, g=g))))),
+            shape=shape,
+            device=device,
+        )
+        trials = Composite(shape=shape, device=device)
+        assert trials != test
+        trials["a", "b", "c", "d", "e", "f"] = Unbounded(
+            shape=shape, device=device, dtype=dtype
+        )
+        trials["a", "b", "c", "d", "e", "g"] = (
+            None
+            if not is_complete
+            else Unbounded(shape=shape, device=device, dtype=dtype)
+        )
+        assert trials == test
+
     def test_del(self, shape, is_complete, device, dtype):
         ts = self._composite_spec(shape, is_complete, device, dtype)
         assert "obs" in ts.keys()