diff --git a/core/src/io/serialization.cc b/core/src/io/serialization.cc index 7b53f42be..3b95deb0f 100644 --- a/core/src/io/serialization.cc +++ b/core/src/io/serialization.cc @@ -1008,6 +1008,7 @@ namespace boost::serialization // Restore extended simulation model model.pinocchioModel_ = pinocchioModel; + model.pinocchioData_ = pinocchio::Data(pinocchioModel); } template diff --git a/core/src/solver/constraint_solvers.cc b/core/src/solver/constraint_solvers.cc index 90eaf56cb..8bbbf4f9e 100644 --- a/core/src/solver/constraint_solvers.cc +++ b/core/src/solver/constraint_solvers.cc @@ -265,11 +265,11 @@ namespace jiminy they can grow arbitrary large for constraints whose bounds are active. It follows that stagnation of residuals is the only viable criteria. The PGS algorithm has been modified for solving second-order cone LCP, which means - that only the L2-norm of the tangential forces can be expected to converge. Because + that only the L^2-norm of the tangential forces can be expected to converge. Because of this, it is too restrictive to check the element-wise variation of the residuals over iterations. It makes more sense to look at the Linf-norm instead, but this criteria is very lax. A good compromise may be to look at the constraint-block-wise - L2-norm, which is similar to what Drake simulator is doing. For reference, see: + L^2-norm, which is similar to what Drake simulator is doing. For reference, see: https://github.com/RobotLocomotion/drake/blob/master/multibody/contact_solvers/pgs_solver.cc */ const double tol = tolAbs_ + tolRel_ * y_.lpNorm() + EPS; diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py b/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py index d9cbc2711..14af723ea 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py @@ -7,7 +7,7 @@ greatly reduces code duplication and bugs. """ from abc import ABC, abstractmethod -from enum import Enum +from enum import IntEnum from typing import Tuple, Sequence, Callable, Union, Optional, Generic, TypeVar import numpy as np @@ -234,7 +234,7 @@ def __init__(self, self.env.quantities[self.name] = quantity # Keep track of the underlying quantity - self.quantity = self.env.quantities.registry[self.name] + self.data = self.env.quantities.registry[self.name] def __del__(self) -> None: try: @@ -266,7 +266,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]: return None # Evaluate raw quantity - value = self.quantity.get() + value = self.data.get() # Early return if quantity is None if value is None: @@ -383,7 +383,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]: return reward_total -class EpisodeState(Enum): +class EpisodeState(IntEnum): """Specify the current state of the ongoing episode. """ @@ -595,7 +595,7 @@ def __init__(self, self.env.quantities[self.name] = quantity # Keep track of the underlying quantity - self.quantity = self.env.quantities.registry[self.name] + self.data = self.env.quantities.registry[self.name] def __del__(self) -> None: try: @@ -615,7 +615,7 @@ def compute(self, info: InfoType) -> bool: This method is not meant to be overloaded. """ # Evaluate the quantity - value = self.quantity.get() + value = self.data.get() # Check if the quantity is out-of-bounds bound. # Note that it may be `None` if the quantity is ill-defined for the diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py b/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py index 4110100b5..aafc05e5c 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py @@ -274,7 +274,7 @@ def _observer_handle(self, # they are always updated before the controller gets called, no matter # if either one or the other is time-continuous. Hacking the internal # dynamics to clear quantities does not address this issue either. - self.quantities.clear() + # self.quantities.clear() # Refresh the observation if not already done but only if a simulation # is already running. It would be pointless to refresh the observation diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py index e647c1a33..dfaebb6dc 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py @@ -448,9 +448,10 @@ def refresh_observation(self, measurement: EngineObsType) -> None: self.env.refresh_observation(measurement) def has_terminated(self, info: InfoType) -> Tuple[bool, bool]: - """Determine whether the episode is over, because a terminal state of - the underlying MDP has been reached or an aborting condition outside - the scope of the MDP has been triggered. + """Determine whether the practitioner is instructed to stop the ongoing + episode on the spot because a termination condition has been triggered, + either coming from the based environment or from the ad-hoc termination + conditions that has been plugged on top of it. At each step of the wrapped environment, all its termination conditions will be evaluated sequentially until one of them eventually gets @@ -465,6 +466,9 @@ def has_terminated(self, info: InfoType) -> Tuple[bool, bool]: This method is called after `refresh_observation`, so that the internal buffer 'observation' is up-to-date. + .. seealso:: + See `InterfaceJiminyEnv.has_terminated` documentation for details. + :param info: Dictionary of extra information for monitoring. :returns: terminated and truncated flags. @@ -492,9 +496,29 @@ def compute_command(self, action: ActT, command: np.ndarray) -> None: self.env.compute_command(action, command) def compute_reward(self, terminated: bool, info: InfoType) -> float: - if self.reward is None: - return 0.0 - return self.reward(terminated, info) + """Compute the total reward, ie the sum of the original reward from the + wrapped environment with the ad-hoc reward components that has been + plugged on top of it. + + .. seealso:: + See `InterfaceController.compute_reward` documentation for details. + + :param terminated: Whether the episode has reached the terminal state + of the MDP at the current step. This flag can be + used to compute a specific terminal reward. + :param info: Dictionary of extra information for monitoring. + + :returns: Aggregated reward for the current step. + """ + # Compute base reward + reward = self.env.compute_reward(terminated, info) + + # Add composed reward if any + if self.reward is not None: + reward += self.reward(terminated, info) + + # Return total reward + return reward class ObservedJiminyEnv( diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py index 1fdc94e83..9ce8f99a7 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py @@ -14,16 +14,16 @@ """ import re import weakref -from enum import Enum +from enum import IntEnum from weakref import ReferenceType from abc import ABC, abstractmethod from collections import OrderedDict from collections.abc import MutableSet from dataclasses import dataclass, replace -from functools import partial, wraps +from functools import wraps from typing import ( Any, Dict, List, Optional, Tuple, Generic, TypeVar, Type, Iterator, - Callable, Literal, ClassVar, cast, TYPE_CHECKING) + Collection, Callable, Literal, ClassVar, TYPE_CHECKING) import numpy as np @@ -47,6 +47,9 @@ class WeakMutableCollection(MutableSet, Generic[ValueT]): Internally, it is implemented as a set for which uniqueness is characterized by identity instead of equality operator. """ + + __slots__ = ("_callback", "_weakrefs") + def __init__(self, callback: Optional[Callable[[ "WeakMutableCollection[ValueT]", ReferenceType ], None]] = None) -> None: @@ -56,7 +59,7 @@ def __init__(self, callback: Optional[Callable[[ Optional: None by default. """ self._callback = callback - self._ref_list: List[ReferenceType] = [] + self._weakrefs: List[ReferenceType] = [] def __callback__(self, ref: ReferenceType) -> None: """Internal method that will be called every time an element must be @@ -73,9 +76,9 @@ def __callback__(self, ref: ReferenceType) -> None: # actually the right weak reference since all of them will be removed # in the end, so it is not a big deal. value = ref() - for i, ref_i in enumerate(self._ref_list): + for i, ref_i in enumerate(self._weakrefs): if value is ref_i(): - del self._ref_list[i] + del self._weakrefs[i] break if self._callback is not None: self._callback(self, ref) @@ -87,13 +90,13 @@ def __contains__(self, obj: Any) -> bool: :param obj: Object to look for in the container. """ - return any(ref() is obj for ref in self._ref_list) + return any(ref() is obj for ref in self._weakrefs) def __iter__(self) -> Iterator[ValueT]: """Dunder method that returns an iterator over the objects of the - container for which a string reference still exist. + container for which a reference still exist. """ - for ref in self._ref_list: + for ref in self._weakrefs: obj = ref() if obj is not None: yield obj @@ -101,7 +104,7 @@ def __iter__(self) -> Iterator[ValueT]: def __len__(self) -> int: """Dunder method that returns the length of the container. """ - return len(self._ref_list) + return len(self._weakrefs) def add(self, value: ValueT) -> None: """Add a new element to the container if not already contained. @@ -111,7 +114,7 @@ def add(self, value: ValueT) -> None: :param obj: Object to add to the container. """ if value not in self: - self._ref_list.append(weakref.ref(value, self.__callback__)) + self._weakrefs.append(weakref.ref(value, self.__callback__)) def discard(self, value: ValueT) -> None: """Remove an element from the container if stored in it. @@ -124,6 +127,34 @@ def discard(self, value: ValueT) -> None: self.__callback__(weakref.ref(value)) +class QuantityStateMachine(IntEnum): + """Specify the current state of a given (unique) quantity, which determines + the steps to perform for retrieving its current value. + """ + + IS_RESET = 0 + """The quantity at hands has just been reset. The quantity must first be + initialized, then refreshed and finally stored in cached before to retrieve + its value. + """ + + IS_INITIALIZED = 1 + """The quantity at hands has been initialized but never evaluated for the + current robot state. Its value must still be refreshed and stored in cache + before to retrieve it. + """ + + IS_CACHED = 2 + """The quantity at hands has been evaluated and its value stored in cache. + As such, its value can be retrieve from cache directly. + """ + + +# Define proxies for fast lookup +_IS_RESET, _IS_INITIALIZED, _IS_CACHED = ( # pylint: disable=invalid-name + QuantityStateMachine) + + class SharedCache(Generic[ValueT]): """Basic thread local shared cache. @@ -135,7 +166,10 @@ class SharedCache(Generic[ValueT]): This implementation is not thread safe. """ - owners: WeakMutableCollection["InterfaceQuantity[ValueT]"] + __slots__ = ( + "_value", "_weakrefs", "_owner", "_auto_refresh", "sm_state", "owners") + + owners: Collection["InterfaceQuantity[ValueT]"] """Owners of the shared buffer, ie quantities relying on it to store the result of their evaluation. This information may be useful for determining the most efficient computation path overall. @@ -157,72 +191,174 @@ def __init__(self) -> None: # Cached value if any self._value: Optional[ValueT] = None - # Whether a value is stored in cached - self._has_value: bool = False + # Whether auto-refresh is requested + self._auto_refresh = True + + # Basic state machine management + self.sm_state: QuantityStateMachine = QuantityStateMachine.IS_RESET # Initialize "owners" of the shared buffer. # Define callback to reset part of the computation graph whenever a # quantity owning the cache gets garbage collected, namely all # quantities that may assume at some point the existence of this - # deleted owner to find the adjust their computation path. - def _callback(self: WeakMutableCollection["InterfaceQuantity"], - ref: ReferenceType # pylint: disable=unused-argument - ) -> None: - owner: Optional["InterfaceQuantity"] + # deleted owner to adjust their computation path. + def _callback( + self: WeakMutableCollection["InterfaceQuantity[ValueT]"], + ref: ReferenceType) -> None: # pylint: disable=unused-argument + owner: Optional["InterfaceQuantity[ValueT]"] for owner in self: # Stop going up in parent chain if dynamic computation graph # update is disable for efficiency. - while owner.allow_update_graph and owner.parent is not None: + while (owner.allow_update_graph and + owner.parent is not None and owner.parent.has_cache): owner = owner.parent - owner.reset(reset_tracking=True, - ignore_auto_refresh=True, - update_graph=True) + owner.reset(reset_tracking=True) - self.owners = WeakMutableCollection(_callback) + # Initialize weak reference to owning quantities + self._weakrefs = WeakMutableCollection(_callback) - @property - def has_value(self) -> bool: - """Whether a value is stored in cache. + # Maintain alive owning quantities upon reset + self.owners = self._weakrefs + self._owner: Optional["InterfaceQuantity[ValueT]"] = None + + def add(self, owner: "InterfaceQuantity[ValueT]") -> None: + """Add a given quantity instance to the set of co-owners associated + with the shared cache at hands. + + .. warning:: + All shared cache co-owners must be instances of the same unique + quantity. An exception will be thrown if an attempt is made to add + a quantity instance that does not satisfy this condition. + + :param owner: Quantity instance to add to the set of co-owners. """ - return self._has_value + # Make sure that the quantity is not already part of the co-owners + if id(owner) in map(id, self.owners): + raise ValueError( + "The specified quantity instance is already an owner of this " + "shared cache.") + + # Make sure that the new owner is consistent with the others if any + if any(owner != _owner for _owner in self._weakrefs): + raise ValueError( + "Quantity instance inconsistent with already existing shared " + "cache owners.") + + # Add quantity instance to shared cache owners + self._weakrefs.add(owner) + + # Refresh owners + if self.sm_state is not QuantityStateMachine.IS_RESET: + self.owners = tuple(self._weakrefs) + + def discard(self, owner: "InterfaceQuantity[ValueT]") -> None: + """Remove a given quantity instance from the set of co-owners + associated with the shared cache at hands. + + :param owner: Quantity instance to remove from the set of co-owners. + """ + # Make sure that the quantity is part of the co-owners + if id(owner) not in map(id, self.owners): + raise ValueError( + "The specified quantity instance is not an owner of this " + "shared cache.") + + # Restore "dynamic" owner list as it may be involved in quantity reset + self.owners = self._weakrefs - def reset(self, *, ignore_auto_refresh: bool = False) -> None: + # Remove quantity instance from shared cache owners + self._weakrefs.discard(owner) + + # Refresh owners. + # Note that one must keep tracking the quantity instance being used in + # computations, aka 'self._owner', even if it is no longer an actual + # shared cache owner. This is necessary because updating it would + # require resetting the state machine, which is not an option as it + # would mess up with quantities storing history since initialization. + if self.sm_state is not QuantityStateMachine.IS_RESET: + self.owners = tuple(self._weakrefs) + + def reset(self, + *, ignore_auto_refresh: bool = False, + reset_state_machine: bool = False) -> None: """Clear value stored in cache if any. :param ignore_auto_refresh: Whether to skip automatic refresh of all co-owner quantities of this shared cache. Optional: False by default. + :param reset_state_machine: Whether to reset completely the state + machine of the underlying quantity, ie not + considering it initialized anymore. + Optional: False by default. """ # Clear cache - self._value = None - self._has_value = False + if self.sm_state is _IS_CACHED: + self.sm_state = _IS_INITIALIZED + + # Special branch if case quantities must be reset on the way + if reset_state_machine: + # Reset the state machine completely + self.sm_state = _IS_RESET - # Refresh automatically if any cache owner requested it and not ignored - if not ignore_auto_refresh: + # Update list of owning quantities + self.owners = self._weakrefs + self._owner = None + + # Reset auto-refresh buffer + self._auto_refresh = True + + # Refresh automatically if not already proven useless and not ignored + if not ignore_auto_refresh and self._auto_refresh: for owner in self.owners: if owner.auto_refresh: owner.get() break + else: + self._auto_refresh = False + + def get(self) -> ValueT: + """Return cached value if any, otherwise evaluate it and store it. + """ + # Get value already stored + if self.sm_state is _IS_CACHED: + # return cast(ValueT, self._value) + return self._value # type: ignore[return-value] - def set(self, value: ValueT) -> None: - """Set value in cache, silently overriding the existing value if any. + # Evaluate quantity + try: + if self.sm_state is _IS_RESET: + # Cache the list of owning quantities + self.owners = tuple(self._weakrefs) - .. warning: - Beware the value is stored by reference for efficiency. It is up to - the user to copy it if necessary. + # Stick to the first owning quantity systematically + owner = self.owners[0] + self._owner = owner - :param value: Value to store in cache. - """ - self._value = value - self._has_value = True + # Initialize quantity if not already done manually + if not owner._is_initialized: + owner.initialize() + assert owner._is_initialized - def get(self) -> ValueT: - """Return cached value if any, otherwise raises an exception. - """ - if self._has_value: - return cast(ValueT, self._value) - raise ValueError( - "No value has been stored. Please call 'set' before 'get'.") + # Get first owning quantity systematically + # assert self._owner is not None + owner = self._owner # type: ignore[assignment] + + # Make sure that the state has been refreshed + if owner._force_update_state: + owner.state.get() + + # Refresh quantity + value = owner.refresh() + except RecursionError as e: + raise LookupError( + "Mutual dependency between quantities is disallowed.") from e + + # Update state machine + self.sm_state = _IS_CACHED + + # Return value after storing it + self._value = value + return value class InterfaceQuantity(ABC, Generic[ValueT]): @@ -249,7 +385,7 @@ class InterfaceQuantity(ABC, Generic[ValueT]): requirements: Dict[str, "InterfaceQuantity"] """Intermediary quantities on which this quantity may rely on for its evaluation at some point, depending on the optimal computation path at - runtime. There values will be exposed to the user as usual properties. + runtime. They will be exposed to the user as usual attributes. """ allow_update_graph: ClassVar[bool] = True @@ -257,8 +393,8 @@ class InterfaceQuantity(ABC, Generic[ValueT]): the quantity can be reset at any point in time to re-compute the optimal computation path, typically after deletion or addition of some other node to its dependent sub-graph. When this happens, the quantity gets reset on - the spot, which is not always acceptable, hence the capability to disable - this feature. + the spot, even if a simulation is already running. This is not always + acceptable, hence the capability to disable this feature at class-level. """ def __init__(self, @@ -296,13 +432,20 @@ class plus any keyword-arguments of its name: cls(env, self, **kwargs) for name, (cls, kwargs) in requirements.items()} + # Define proxies for user-specified intermediary quantities. + # This approach is much faster than hidding quantities behind value + # getters. In particular, dynamically adding properties, which is hacky + # but which is the fastest alternative option, still adds 35% overhead + # on Python 3.11 compared to calling `get` directly. The "official" + # approaches are even slower, ie implementing custom `__getattribute__` + # method or worst custom `__getattr__` method. + for name, quantity in self.requirements.items(): + setattr(self, name, quantity) + # Update the state explicitly if available but auto-refresh not enabled - self._state: Optional[StateQuantity] = None + self._force_update_state = False if isinstance(self, AbstractQuantity): - quantity = self.requirements["state"] - if not quantity.auto_refresh: - assert isinstance(quantity, StateQuantity) - self._state = quantity + self._force_update_state = not self.state.auto_refresh # Shared cache handling self._cache: Optional[SharedCache[ValueT]] = None @@ -314,16 +457,6 @@ class plus any keyword-arguments of its # Whether the quantity must be re-initialized self._is_initialized: bool = False - # Add getter dynamically for user-specified intermediary quantities. - # This approach is hacky but much faster than any of other official - # approach, ie implementing custom a `__getattribute__` method or even - # worst a custom `__getattr__` method. - def get_value(name: str, quantity: InterfaceQuantity) -> Any: - return quantity.requirements[name].get() - - for name in requirement_names: - setattr(type(self), name, property(partial(get_value, name))) - if TYPE_CHECKING: def __getattr__(self, name: str) -> Any: """Get access to intermediary quantities as first-class properties, @@ -373,7 +506,7 @@ def cache(self, cache: Optional[SharedCache[ValueT]]) -> None: # Withdraw this quantity from the owners of its current cache if any if self._cache is not None: try: - self._cache.owners.discard(self) + self._cache.discard(self) except ValueError: # This may fail if the quantity is already being garbage # collected when clearing cache. @@ -381,7 +514,7 @@ def cache(self, cache: Optional[SharedCache[ValueT]]) -> None: # Declare this quantity as owner of the cache if specified if cache is not None: - cache.owners.add(self) + cache.add(self) # Update internal cache attribute self._cache = cache @@ -411,43 +544,35 @@ def get(self) -> ValueT: .. warning:: This method is not meant to be overloaded. """ - # Get value in cache if available. - # Note that direct access to internal `_value` attribute is preferred - # over the public API `get` for speedup. The same cannot be done for - # `has_value` as it would prevent mocking it during running unit tests - # or benchmarks. - if (self.has_cache and - self._cache.has_value): # type: ignore[union-attr] + # Delegate getting value to shared cache if available + if self._cache is not None: + # Get value + value = self._cache.get() + + # This instance is not forceably considered active at this point. + # Note that it must be done AFTER getting the value, otherwise it + # would mess up with computation graph tracking at initialization. self._is_active = True - return self._cache._value # type: ignore[union-attr,return-value] + + # Return cached value + return value # Evaluate quantity try: # Initialize quantity if not self._is_initialized: self.initialize() - assert (self._is_initialized and - self._is_active) # type: ignore[unreachable] - - # Make sure that the state has been refreshed - if self._state is not None: - self._state.get() + assert self._is_initialized # Refresh quantity - value = self.refresh() + return self.refresh() except RecursionError as e: raise LookupError( "Mutual dependency between quantities is disallowed.") from e - # Return value after storing it in shared cache if available - if self.has_cache: - self._cache.set(value) # type: ignore[union-attr] - return value - def reset(self, reset_tracking: bool = False, - ignore_auto_refresh: bool = False, - update_graph: bool = False) -> None: + *, ignore_other_instances: bool = False) -> None: """Consider that the quantity must be re-initialized before being evaluated once again. @@ -465,53 +590,47 @@ def reset(self, :param reset_tracking: Do not consider this quantity as active anymore until the `get` method gets called once again. Optional: False by default. - :param ignore_auto_refresh: Whether to skip automatic refresh of all - co-owner quantities of this shared cache. - Optional: False by default. - :param update_graph: If true, then the quantity will be reset if and - only if dynamic computation graph update is - allowed as prescribed by class attribute - `allow_update_graph`. If false, then it will be - reset no matter what. + :param ignore_other_instances: + Whether to skip reset of intermediary quantities as well as any + shared cache co-owner quantity instances. + Optional: False by default. """ # Make sure that auto-refresh can be honored - if (not ignore_auto_refresh and self.auto_refresh and - not self.has_cache): + if self.auto_refresh and not self.has_cache: raise RuntimeError( "Automatic refresh enabled but no shared cache is available. " "Please add one before calling this method.") # Reset all requirements first - for quantity in self.requirements.values(): - quantity.reset(reset_tracking, ignore_auto_refresh, update_graph) + if not ignore_other_instances: + for quantity in self.requirements.values(): + quantity.reset(reset_tracking, ignore_other_instances=False) - # Skip reset if dynamic computation graph update if appropriate - if update_graph and not self.allow_update_graph: + # Skip reset if dynamic computation graph update is not allowed + if self.env.is_simulation_running and not self.allow_update_graph: return - # No longer consider this exact instance as active if requested + # No longer consider this exact instance as active if reset_tracking: self._is_active = False # No longer consider this exact instance as initialized self._is_initialized = False - # More work must to be done if shared cache is available and has value + # More work must to be done if shared cache if appropriate if self.has_cache: - # Early return if shared cache has no value - if not self.cache.has_value: - return - - # Invalidate cache before looping over all identical properties. - # Note that auto-refresh must be ignored to avoid infinite loop. - self.cache.reset(ignore_auto_refresh=True) - - # Reset all identical quantities - for owner in self.cache.owners: - owner.reset(ignore_auto_refresh=True) - - # Reset shared cache one last time but without ignore auto refresh - self.cache.reset(ignore_auto_refresh=ignore_auto_refresh) + # Reset all identical quantities. + # Note that auto-refresh will be done afterward if requested. + if not ignore_other_instances: + for owner in self.cache.owners: + if owner is not self: + owner.reset(reset_tracking=reset_tracking, + ignore_other_instances=True) + + # Reset shared cache + self.cache.reset( + ignore_auto_refresh=not self.env.is_simulation_running, + reset_state_machine=True) def initialize(self) -> None: """Initialize internal buffers. @@ -551,7 +670,7 @@ def refresh(self) -> ValueT: Type[InterfaceQuantity[QuantityValueT_co]], Dict[str, Any]] -class QuantityEvalMode(Enum): +class QuantityEvalMode(IntEnum): """Specify on which state to evaluate a given quantity. """ @@ -564,6 +683,10 @@ class QuantityEvalMode(Enum): """ +# Define proxies for fast lookup +_TRUE, _REFERENCE = QuantityEvalMode + + @dataclass(unsafe_hash=True) class AbstractQuantity(InterfaceQuantity, Generic[ValueT]): """Base class for generic quantities involved observer-controller blocks, @@ -642,7 +765,7 @@ class plus any keyword-arguments of its super().__init__(env, parent, requirements, auto_refresh=auto_refresh) # Add trajectory quantity proxy - trajectory = self.requirements["state"].requirements["trajectory"] + trajectory = self.state.trajectory assert isinstance(trajectory, DatasetTrajectoryQuantity) self.trajectory = trajectory @@ -656,14 +779,13 @@ def initialize(self) -> None: super().initialize() # Force initializing state quantity - state = self.requirements["state"] - state.initialize() + self.state.initialize() # Refresh robot proxy - assert isinstance(state, StateQuantity) - self.robot = state.robot - self.pinocchio_model = state.pinocchio_model - self.pinocchio_data = state.pinocchio_data + assert isinstance(self.state, StateQuantity) + self.robot = self.state.robot + self.pinocchio_model = self.state.pinocchio_model + self.pinocchio_data = self.state.pinocchio_data def sync(fun: Callable[..., None]) -> Callable[..., None]: @@ -996,17 +1118,14 @@ def __init__(self, # only refresh the state when needed if the evaluation mode is TRAJ. # * Update state: 500ns (TRUE) | 5.0us (TRAJ) # * Check cache state: 70ns - auto_refresh = mode == QuantityEvalMode.TRUE + auto_refresh = mode is QuantityEvalMode.TRUE # Call base implementation. super().__init__( - env, parent, requirements={}, auto_refresh=auto_refresh) - - # Create empty trajectory database, manually added as a requirement. - # Note that it must be done after base initialization, otherwise a - # getter will be added for it as first-class property. - self.trajectory = DatasetTrajectoryQuantity(env, self) - self.requirements["trajectory"] = self.trajectory + env, + parent, + requirements=dict(trajectory=(DatasetTrajectoryQuantity, {})), + auto_refresh=auto_refresh) # Robot for which the quantity must be evaluated self.robot = env.robot @@ -1014,7 +1133,7 @@ def __init__(self, self.pinocchio_data = env.robot.pinocchio_data # State for which the quantity must be evaluated - self.state = State(t=np.nan, q=np.array([])) + self._state = State(t=np.nan, q=np.array([])) # Persistent buffer for storing body external forces if necessary self._f_external_vec = pin.StdVec_Force() @@ -1068,7 +1187,7 @@ def initialize(self) -> None: assert isinstance(owner, StateQuantity) if owner._is_initialized: continue - if owner.mode == QuantityEvalMode.TRUE: + if owner.mode is QuantityEvalMode.TRUE: owner.robot = owner.env.robot use_theoretical_model = False else: @@ -1086,7 +1205,7 @@ def initialize(self) -> None: super().initialize() # Refresh proxies and allocate memory for storing external forces - if self.mode == QuantityEvalMode.TRUE: + if self.mode is QuantityEvalMode.TRUE: self._f_external_vec = self.env.robot_state.f_external else: self._f_external_vec = pin.StdVec_Force() @@ -1128,11 +1247,11 @@ def initialize(self) -> None: i += constraint.size # Allocate state for which the quantity must be evaluated if needed - if self.mode == QuantityEvalMode.TRUE: + if self.mode is QuantityEvalMode.TRUE: if not self.env.is_simulation_running: raise RuntimeError("No simulation running. Impossible to " "initialize this quantity.") - self.state = State( + self._state = State( 0.0, self.env.robot_state.q, self.env.robot_state.v, @@ -1146,21 +1265,21 @@ def refresh(self) -> State: """Compute the current state depending on the mode of evaluation, and make sure that kinematics and dynamics quantities are up-to-date. """ - if self.mode == QuantityEvalMode.TRUE: + if self.mode is _TRUE: # Update the current simulation time - self.state.t = self.env.stepper_state.t + self._state.t = self.env.stepper_state.t # Update external forces and constraint multipliers in state buffer multi_array_copyto(self._f_external_slices, self._f_external_list) multi_array_copyto( self._constraint_lambda_slices, self._constraint_lambda_list) else: - self.state = self.trajectory.get() + self._state = self.trajectory.get() # Copy body external forces from stacked buffer to force vector - has_forces = self.state.f_external is not None + has_forces = self._state.f_external is not None if has_forces: - array_copyto(self._f_external_batch, self.state.f_external) + array_copyto(self._f_external_batch, self._state.f_external) multi_array_copyto(self._f_external_list, self._f_external_slices) @@ -1168,9 +1287,9 @@ def refresh(self) -> State: if self.update_kinematics: update_quantities( self.robot, - self.state.q, - self.state.v, - self.state.a, + self._state.q, + self._state.v, + self._state.a, self._f_external_vec if has_forces else None, update_dynamics=self._update_dynamics, update_centroidal=self._update_centroidal, @@ -1181,11 +1300,11 @@ def refresh(self) -> State: self.trajectory.use_theoretical_model)) # Restore lagrangian multipliers of the constraints if available - if self.state.lambda_c is not None: + if self._state.lambda_c is not None: array_copyto( - self._constraint_lambda_batch, self.state.lambda_c) + self._constraint_lambda_batch, self._state.lambda_c) multi_array_copyto(self._constraint_lambda_list, self._constraint_lambda_slices) # Return state - return self.state + return self._state diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py index ce244c168..7852688cd 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py @@ -97,7 +97,7 @@ def __init__(self, Group. The basic subtraction operator `operator.sub` is appropriate for the Euclidean space. Optional: `operator.sub` by default. - :param order: Order of Lp-Norm that will be used as distance metric. + :param order: Order of L^p-norm that will be used as distance metric. Optional: 2 by default. """ # Backup some user argument(s) @@ -274,7 +274,7 @@ class ShiftTrackingQuantityTermination(QuantityTermination[np.ndarray]): """Base class to derive termination condition from the shift between the current and reference values of a given quantity. - The shift is defined as the minimum time-aligned distance (L2-norm of the + The shift is defined as the minimum time-aligned distance (L^2-norm of the difference) between two multivariate time series. In this case, a variable-length horizon bounded by 'max_stack' is considered. @@ -327,7 +327,7 @@ def __init__(self, common subtraction operator `operator.sub` is appropriate for Euclidean space. Optional: `operator.sub` by default. - :param order: Order of Lp-Norm that will be used as distance metric. + :param order: Order of L^p-norm that will be used as distance metric. :param is_truncation: Whether the episode should be considered terminated or truncated whenever the termination condition is triggered. @@ -445,9 +445,8 @@ def initialize(self) -> None: super().initialize() # Initialize the actuated joint position indices - quantity = self.requirements["position"] - quantity.initialize() - position_indices = quantity.kinematic_indices + self.position.initialize() + position_indices = self.position.kinematic_indices # Refresh mechanical joint position indices position_limit_low = self.env.robot.pinocchio_model.lowerPositionLimit @@ -456,8 +455,8 @@ def initialize(self) -> None: self.position_high = position_limit_high[position_indices] def refresh(self) -> Tuple[np.ndarray, np.ndarray]: - return (self.position - self.position_low, - self.position_high - self.position) + position = self.position.get() + return (position - self.position_low, self.position_high - position) class MechanicalSafetyTermination(AbstractTerminationCondition): diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py index 50c8b2cae..7e49596e4 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py @@ -238,12 +238,12 @@ class MinimizeFrictionReward(QuantityReward): """Reward the agent for minimizing the tangential forces at all the contact points and collision bodies, and to avoid jerky intermittent contact state. - The L2-norm is used to aggregate all the local tangential forces. While the - L1-norm would be more natural in this specific cases, using the L2-norm is - preferable as it promotes space-time regularity, ie balancing the force - distribution evenly between all the candidate contact points and avoiding - jerky contact forces over time (high-frequency vibrations), phenomena to - which the L1-norm is completely insensitive. + The L^2-norm is used to aggregate all the local tangential forces. While + the L^1-norm would be more natural in this specific cases, using the L-2 + norm is preferable as it promotes space-time regularity, ie balancing the + force distribution evenly between all the candidate contact points and + avoiding jerky contact forces over time (high-frequency vibrations), + phenomena to which the L^1-norm is completely insensitive. """ def __init__(self, env: InterfaceJiminyEnv, @@ -481,8 +481,9 @@ def initialize(self) -> None: def refresh(self) -> float: # Query the height and normal to the ground profile for the position in # world plane of all the contact points. + positions = self.positions.get() jiminy.query_heightmap(self._heightmap, - self.positions[:2], + positions[:2], self._heights, self._normals) @@ -490,7 +491,7 @@ def refresh(self) -> float: # self._normals /= np.linalg.norm(self._normals, axis=0) # First-order distance estimation assuming no curvature - return self._min_depth(self.positions, self._heights, self._normals) + return self._min_depth(positions, self._heights, self._normals) class FlyingTermination(QuantityTermination): @@ -561,7 +562,7 @@ def __init__(self, """ super().__init__( env, - "termination_flying", + "termination_impact_force", (MaskedQuantity, dict( # type: ignore[arg-type] quantity=(MultiContactNormalizedSpatialForce, dict()), axis=0, diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py index d71b092a1..d3e5ea57b 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py @@ -5,7 +5,7 @@ import math import logging from functools import partial -from typing import Sequence, Tuple, Optional, Union +from typing import Sequence, Tuple, Optional, Union, Literal import numpy as np import numba as nb @@ -36,7 +36,7 @@ def radial_basis_function(error: ArrayOrScalar, where :math:`dist(x, x_ref)` is some distance metric of the error between the observed (:math:`x`) and desired (:math:`x_ref`) values of a - multi-variate quantity. The L2-norm (Euclidean norm) was used when it was + multi-variate quantity. The L^2-norm (Euclidean norm) was used when it was first introduced as a non-linear kernel for Support Vector Machine (SVM) algorithm. Such restriction does not make sense in the context of reward normalization. The scaling parameter :math:`sigma` is derived from the @@ -45,7 +45,7 @@ def radial_basis_function(error: ArrayOrScalar, :param error: Multi-variate error on some tangent space as a 1D array. :param cutoff: Cut-off threshold to consider. - :param order: Order of Lp-Norm that will be used as distance metric. + :param order: Order of L^p-norm that will be used as distance metric. """ error_ = np.asarray(error) is_contiguous = error_.flags.f_contiguous or error_.flags.c_contiguous @@ -67,33 +67,48 @@ def radial_basis_function(error: ArrayOrScalar, class AdditiveMixtureReward(MixtureReward): - """Weighted sum of multiple independent reward components. - - Aggregation of reward components using the addition operator is suitable - when improving the behavior for any of them without the others is equally - beneficial, and unbalanced performance for each reward component is - considered acceptable rather than detrimental. It especially makes sense + """Weighted L^p-norm of multiple independent reward components. + + Aggregating the reward components using L^p-norm progressively transition + from promoting versatility for 0 < p < 1, to overall competency for p = 1, + and ultimately specialization for p > 1. In particular, the L^1-norm is + appropriate when improving the behavior for any of them without the others + is equally beneficial, and unbalanced performance for each reward component + is considered acceptable rather than detrimental. It especially makes sense for reward that are not competing with each other (improving one tends to impede some other). In the latter case, the multiplicative operator is more appropriate. See `MultiplicativeMixtureReward` documentation for details. + + .. note:: + Combining `AdditiveMixtureReward` for L^inf-norm with `SurviveReward` + ensures a minimum reward that the agent would obtain no matter what, + to encourage surviving in last resort. This is usually useful to + bootstrap learning at the very beginning. """ def __init__(self, env: InterfaceJiminyEnv, name: str, components: Sequence[AbstractReward], + order: Union[int, float, Literal['inf']] = 1, weights: Optional[Sequence[float]] = None) -> None: """ :param env: Base or wrapped jiminy environment. :param name: Desired name of the total reward. :param components: Sequence of reward components to aggregate. - :param weights: Sequence of weights associated with each reward - components, with the same ordering as 'components'. - Optional: 1.0 for all reward components by default. + :param order: Order of L^p-norm used to add up the reward components. + :param weights: Optional sequence of weights associated with each + reward component, with same ordering as 'components'. + Optional: Same weights that preserves normalization by + default, `(1.0 / len(components),) * len(components)`. """ # Handling of default arguments if weights is None: - weights = (1.0,) * len(components) + weights = (1.0 / len(components),) * len(components) + + # Make sure that the order is strictly positive + if not isinstance(order, str) and order <= 0.0: + raise ValueError("'order' must be strictly positive or 'inf'.") # Make sure that the weight sequence is consistent with the components if len(weights) != len(components): @@ -107,7 +122,7 @@ def __init__(self, if weight > 0.0)) # Determine whether the cumulative reward is normalized - weight_total = 0.0 + scale = 0.0 for weight, reward in zip(weights, components): if not reward.is_normalized: LOGGER.warning( @@ -116,24 +131,32 @@ def __init__(self, "recommended.", reward.name) is_normalized = False break - weight_total += weight + if order == 'inf': + scale = max(scale, weight) + else: + scale += weight else: - is_normalized = abs(weight_total - 1.0) < 1e-4 + is_normalized = abs(1.0 - scale) < 1e-4 # Backup user-arguments + self.order = order self.weights = tuple(weights) # Jit-able method computing the weighted sum of reward components @nb.jit(nopython=True, cache=True, fastmath=True) - def weighted_sum(weights: Tuple[float, ...], - values: Tuple[Optional[float], ...] - ) -> Optional[float]: - """Compute the weighted sum of all the reward components that has - been evaluated, filtering out the others. + def weighted_norm(weights: Tuple[float, ...], + order: Union[int, float, Literal['inf']], + values: Tuple[Optional[float], ...] + ) -> Optional[float]: + """Compute the weighted L^p-norm of all the reward components that + has been evaluated, filtering out the others. This method returns `None` if no reward component has been evaluated. + :param weights: Sequence of weights for each reward component, with + same ordering as 'components'. + :param order: Order of the L^p-norm. :param values: Sequence of scalar value for reward components that has been evaluated, `None` otherwise, with the same ordering as 'components'. @@ -142,18 +165,28 @@ def weighted_sum(weights: Tuple[float, ...], been evaluated, `None` otherwise. """ total, any_value = 0.0, False - for weight, value in zip(weights, values): + for value, weight in zip(values, weights): if value is not None: - total += weight * value + if isinstance(order, str): + if any_value: + total = max(total, weight * value) + else: + total = value + else: + total += weight * math.pow(value, order) any_value = True - return total if any_value else None + if any_value: + if isinstance(order, str): + return total + return math.pow(total, 1.0 / order) + return None # Call base implementation super().__init__( env, name, components, - partial(weighted_sum, self.weights), + partial(weighted_norm, self.weights, self.order), is_normalized) @@ -167,9 +200,9 @@ def weighted_sum(weights: Tuple[float, ...], class MultiplicativeMixtureReward(MixtureReward): - """Product of multiple independent reward components. + """Geometric mean of independent reward components, to promote versatility. - Aggregation of reward components using multiplication operator is suitable + Aggregating the reward components using the geometric mean is appropriate when maintaining balanced performance between all reward components is essential, and having poor performance for any of them is unacceptable. This type of aggregation is especially useful when reward components are @@ -189,29 +222,33 @@ def __init__(self, # Determine whether the cumulative reward is normalized is_normalized = all(reward.is_normalized for reward in components) - # Call base implementation - super().__init__(env, name, components, self._reduce, is_normalized) + # Jit-able method computing the product of reward components + @nb.jit(nopython=True, cache=True, fastmath=True) + def geometric_mean( + values: Tuple[Optional[float], ...]) -> Optional[float]: + """Compute the product of all the reward components that has + been evaluated, filtering out the others. - def _reduce(self, values: Sequence[Optional[float]]) -> Optional[float]: - """Compute the product of all the reward components that has been - evaluated, filtering out the others. + This method returns `None` if no reward component has been + evaluated. - This method returns `None` if no reward component has been evaluated. + :param values: Sequence of scalar value for reward components that + has been evaluated, `None` otherwise, with the same + ordering as 'components'. - :param values: Sequence of scalar value for reward components that has - been evaluated, `None` otherwise, with the same ordering - as 'components'. + :returns: Scalar value if at least one of the reward component has + been evaluated, `None` otherwise. + """ + total, any_value, n_values = 1.0, False, 0 + for value in values: + if value is not None: + total *= value + any_value = True + n_values += 1 + return math.pow(total, 1.0 / n_values) if any_value else None - :returns: Scalar value if at least one of the reward component has been - evaluated, `None` otherwise. - """ - # TODO: x2 speedup can be expected with `nb.jit` - total, any_value = 1.0, False - for value in values: - if value is not None: - total *= value - any_value = True - return total if any_value else None + # Call base implementation + super().__init__(env, name, components, geometric_mean, is_normalized) MultiplicativeMixtureReward.is_normalized.__doc__ = \ diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py index 43d4b37e0..0172309d7 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py @@ -109,6 +109,7 @@ class BaseJiminyEnv(InterfaceJiminyEnv[ObsT, ActT], def __init__(self, simulator: Simulator, step_dt: float, + simulation_duration_max: float = 86400.0, debug: bool = False, render_mode: Optional[str] = None, **kwargs: Any) -> None: @@ -119,11 +120,15 @@ def __init__(self, independent from the controller and observation update periods. The latter are configured via `engine.set_options`. - :param mode: Rendering mode. It can be either 'human' to display the - current simulation state, or 'rgb_array' to return a - snapshot as an RGB array without showing it on the screen. - Optional: 'human' by default if available with the current - backend (or default if none), 'rgb_array' otherwise. + :param simulation_duration_max: + Maximum duration of a simulation. If the current simulation time + exceeds this threshold, then it will triggers `is_truncated=True`. + It cannot exceed the maximum possible duration before telemetry + log time overflow which is extremely large (about 30 years). Beware + that log data are stored in RAM, which may cause out-of-memory + error if the episode is lasting for too long without reset. + Optional: About 4GB of log data assuming 5ms control update period + and telemetry disabled for everything but the robot configuration. :param debug: Whether the debug mode must be enabled. Doing it enables telemetry recording. :param render_mode: Desired rendering mode, ie "human" or "rgb_array". @@ -174,7 +179,8 @@ def __init__(self, assert render_mode in self.metadata['render_modes'] # Backup some user arguments - self.simulator: Simulator = simulator + self.simulator = simulator + self.simulation_duration_max = simulation_duration_max self._step_dt = step_dt self.render_mode = render_mode self.debug = debug @@ -707,12 +713,6 @@ def reset(self, # type: ignore[override] self.robot.compute_sensor_measurements( 0.0, q_init, v_init, a_init, u_motor, f_external) - # Re-initialize the quantity manager. - # Note that computation graph tracking is never reset automatically. - # It is the responsibility of the practitioner implementing a derived - # environment whenever it makes sense for its specific use-case. - self.quantities.reset(reset_tracking=False) - # Run the reset hook if any. # Note that the reset hook must be called after `_setup` because it # expects that the robot is not going to change anymore at this point. @@ -727,6 +727,12 @@ def reset(self, # type: ignore[override] env = env_derived self.derived = env + # Re-initialize the quantity manager. + # Note that computation graph tracking is never reset automatically. + # It is the responsibility of the practitioner implementing a derived + # environment whenever it makes sense for its specific use-case. + self.quantities.reset(reset_tracking=False) + # Instantiate the actual controller. # Note that a weak reference must be used to avoid circular reference. self.robot.controller = jiminy.FunctionalController( @@ -751,6 +757,9 @@ def reset(self, # type: ignore[override] # Update shared buffers self._refresh_buffers() + # Clear cache and auto-refresh managed quantities + self.quantities.clear() + # Initialize the observation env._observer_handle( self.stepper_state.t, @@ -879,6 +888,9 @@ def step(self, # type: ignore[override] # Update shared buffers self._refresh_buffers() + # Clear cache and auto-refresh managed quantities + self.quantities.clear() + # Update the observer at the end of the step. # This is necessary because, internally, it is called at the beginning # of the every integration steps, during the controller update. @@ -1502,9 +1514,7 @@ def has_terminated(self, info: InfoType) -> Tuple[bool, bool]: .. warning:: No matter what, truncation will happen when reaching the maximum - simulation duration, i.e. 'self.simulation_duration_max'. Its - default value is extremely large, but it can be overwritten by the - user to terminate the simulation earlier. + simulation duration, i.e. 'self.simulation_duration_max'. .. note:: This method is called after `refresh_observation`, so that the diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/envs/locomotion.py index 661a854d9..c4e0b5165 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/envs/locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/envs/locomotion.py @@ -150,7 +150,6 @@ def __init__(self, std_ratio = {k: v for k, v in std_ratio.items() if v > 0.0} # Backup user arguments - self.simulation_duration_max = simulation_duration_max self.reward_mixture = reward_mixture self.urdf_path = urdf_path self.mesh_path_dir = mesh_path_dir @@ -202,7 +201,8 @@ def __init__(self, simulator.import_options(config_path) # Initialize base class - super().__init__(simulator, step_dt, debug, **kwargs) + super().__init__( + simulator, step_dt, simulation_duration_max, debug, **kwargs) def _setup(self) -> None: """Configure the environment. diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py index 2d7c64f90..9def585df 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py @@ -4,7 +4,7 @@ application (locomotion, grasping...). """ import warnings -from enum import Enum +from enum import IntEnum from functools import partial from dataclasses import dataclass from typing import ( @@ -90,9 +90,11 @@ def aggregate_frame_names(quantity: InterfaceQuantity) -> Tuple[ # First, order all multi-frame quantities by decreasing length frame_names_chunks: List[Tuple[str, ...]] = [] for owner in quantities: - if owner.parent.is_active(any_cache_owner=False): - if isinstance(owner.parent, MultiFrameQuantity): - frame_names_chunks.append(owner.parent.frame_names) + parent = owner.parent + assert parent is not None + if parent.is_active(any_cache_owner=False): + if isinstance(parent, MultiFrameQuantity): + frame_names_chunks.append(parent.frame_names) # Next, process ordered multi-frame quantities sequentially. # For each of them, we first check if its set of frames is completely @@ -132,9 +134,11 @@ def aggregate_frame_names(quantity: InterfaceQuantity) -> Tuple[ # Otherwise, we just move to the next quantity. frame_name_chunks: List[str] = [] for owner in quantities: - if owner.parent.is_active(any_cache_owner=False): - if isinstance(owner.parent, FrameQuantity): - frame_name_chunks.append(owner.parent.frame_name) + parent = owner.parent + assert parent is not None + if parent.is_active(any_cache_owner=False): + if isinstance(parent, FrameQuantity): + frame_name_chunks.append(parent.frame_name) frame_name = frame_name_chunks[-1] if frame_name not in frame_names: frame_names.append(frame_name) @@ -263,7 +267,7 @@ def refresh(self) -> Dict[Union[str, Tuple[str, ...]], np.ndarray]: return self._rot_mat_map -class OrientationType(Enum): +class OrientationType(IntEnum): """Specify the desired vector representation of the frame orientations. """ @@ -284,6 +288,11 @@ class OrientationType(Enum): """ +# Define proxies for fast lookup +_MATRIX, _EULER, _QUATERNION, _ANGLE_AXIS = ( # pylint: disable=invalid-name + OrientationType) + + @dataclass(unsafe_hash=True) class _BatchedFramesOrientation( InterfaceQuantity[Dict[Union[str, Tuple[str, ...]], np.ndarray]]): @@ -407,12 +416,13 @@ def initialize(self) -> None: def refresh(self) -> Dict[Union[str, Tuple[str, ...]], np.ndarray]: # Get the complete batch of rotation matrices managed by this instance - rot_mat_batch = self.rot_mat_map[self.frame_names] + value = self.rot_mat_map.get() + rot_mat_batch = value[self.frame_names] # Convert all rotation matrices at once to the desired representation - if self.type == OrientationType.EULER: + if self.type == _EULER: matrix_to_rpy(rot_mat_batch, self._data_batch) - elif self.type == OrientationType.QUATERNION: + elif self.type == _QUATERNION: matrix_to_quat(rot_mat_batch, self._data_batch) else: # Slice data. @@ -490,10 +500,14 @@ def initialize(self) -> None: # Force re-initializing shared data if the active set has changed if not was_active: - self.requirements["data"].reset(reset_tracking=True) + # Must reset the tracking for shared computation systematically, + # just in case the optimal computation path has changed to the + # point that relying on batched quantity is no longer relevant. + self.data.reset(reset_tracking=True) def refresh(self) -> np.ndarray: - return self.data[self.frame_name] + value = self.data.get() + return value[self.frame_name] @dataclass(unsafe_hash=True) @@ -570,16 +584,14 @@ def initialize(self) -> None: # Force re-initializing shared data if the active set has changed if not was_active: - # Must reset the tracking for shared computation systematically, - # just in case the optimal computation path has changed to the - # point that relying on batched quantity is no longer relevant. - self.requirements["data"].reset(reset_tracking=True) + self.data.reset(reset_tracking=True) def refresh(self) -> np.ndarray: # Return a slice of batched data. # Note that mapping from frame names to frame index in batched data # cannot be pre-computed as it may changed dynamically. - return self.data[self.frame_names] + value = self.data.get() + return value[self.frame_names] @dataclass(unsafe_hash=True) @@ -726,10 +738,11 @@ def initialize(self) -> None: # Force re-initializing shared data if the active set has changed if not was_active: - self.requirements["data"].reset(reset_tracking=True) + self.data.reset(reset_tracking=True) def refresh(self) -> np.ndarray: - return self.data[self.frame_name] + value = self.data.get() + return value[self.frame_name] @dataclass(unsafe_hash=True) @@ -789,10 +802,11 @@ def initialize(self) -> None: # Force re-initializing shared data if the active set has changed if not was_active: - self.requirements["data"].reset(reset_tracking=True) + self.data.reset(reset_tracking=True) def refresh(self) -> np.ndarray: - return self.data[self.frame_names] + value = self.data.get() + return value[self.frame_names] @dataclass(unsafe_hash=True) @@ -852,10 +866,10 @@ def __init__(self, def refresh(self) -> np.ndarray: # Copy the position of all frames at once in contiguous buffer - array_copyto(self._xyzquat[:3], self.position) + array_copyto(self._xyzquat[:3], self.position.get()) # Copy the quaternion of all frames at once in contiguous buffer - array_copyto(self._xyzquat[-4:], self.quat) + array_copyto(self._xyzquat[-4:], self.quat.get()) return self._xyzquat @@ -920,10 +934,10 @@ def __init__(self, def refresh(self) -> np.ndarray: # Copy the position of all frames at once in contiguous buffer - array_copyto(self._xyzquats[:3], self.positions) + array_copyto(self._xyzquats[:3], self.positions.get()) # Copy the quaternion of all frames at once in contiguous buffer - array_copyto(self._xyzquats[-4:], self.quats) + array_copyto(self._xyzquats[-4:], self.quats.get()) return self._xyzquats @@ -1040,10 +1054,10 @@ def quat_average_2d(quat: np.ndarray, out: np.ndarray) -> None: def refresh(self) -> np.ndarray: # Compute the mean translation - self._position_average(self.positions, self._position_mean_view) + self._position_average(self.positions.get(), self._position_mean_view) # Compute the mean quaternion - self._quat_average(self.quats, self._quat_mean_view) + self._quat_average(self.quats.get(), self._quat_mean_view) return self._xyzquat_mean @@ -1264,7 +1278,7 @@ def refresh(self) -> np.ndarray: # point. This should never occur in practice as it will be fine at # the end of the first step already, before the reward and termination # conditions are evaluated. - xyzquat_prev, xyzquat = self.xyzquat_stack + xyzquat_prev, xyzquat = self.xyzquat_stack.get() # Compute average frame velocity in local frame since previous step self._data[:] = self._difference(xyzquat_prev, xyzquat) @@ -1342,7 +1356,8 @@ def __init__(self, def refresh(self) -> np.ndarray: # Interpolate the average spatial velocity at midpoint - return self._integrate(self.xyzquat_next, - 0.5 * self.xyzquat_diff) + return self._integrate( + self.xyzquat_next.get(), - 0.5 * self.xyzquat_diff.get()) @dataclass(unsafe_hash=True) @@ -1405,7 +1420,7 @@ def __init__(self, def refresh(self) -> np.ndarray: # Compute Yaw-free average orientation - remove_yaw_from_quat(self.quat_mean, self._quat_no_yaw_mean) + remove_yaw_from_quat(self.quat_mean.get(), self._quat_no_yaw_mean) return self._quat_no_yaw_mean @@ -1506,14 +1521,15 @@ def __init__(self, def refresh(self) -> np.ndarray: # Compute average frame velocity in local frame since previous step - np.multiply(self.xyzquat_diff, self._inv_step_dt, self._v_spatial) + np.multiply( + self.xyzquat_diff.get(), self._inv_step_dt, self._v_spatial) # Translate local velocity to world frame if self.reference_frame == pin.LOCAL_WORLD_ALIGNED: # Define world frame as the "middle" between prev and next pose. # Here, we only care about the middle rotation, so we can consider # SO3 Lie Group algebra instead of SE3. - quat_apply(self.quat_mean, self._v_lin_ang, self._v_lin_ang) + quat_apply(self.quat_mean.get(), self._v_lin_ang, self._v_lin_ang) return self._v_spatial @@ -1599,7 +1615,7 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - state = self.state + state = self.state.get() if ((self.kinematic_level == pin.ACCELERATION and state.a is None) or (self.kinematic_level >= pin.VELOCITY and state.v is None)): raise RuntimeError( @@ -1645,22 +1661,24 @@ def initialize(self) -> None: if self._must_refresh: self._data = np.full((len(self.kinematic_indices),), float("nan")) else: + state = self.state.get() if self.kinematic_level == pin.KinematicLevel.POSITION: - self._data = self.state.q[slice(kin_first, kin_last + 1)] + self._data = state.q[slice(kin_first, kin_last + 1)] elif self.kinematic_level == pin.KinematicLevel.VELOCITY: - self._data = self.state.v[slice(kin_first, kin_last + 1)] + self._data = state.v[slice(kin_first, kin_last + 1)] else: - self._data = self.state.a[slice(kin_first, kin_last + 1)] + self._data = state.a[slice(kin_first, kin_last + 1)] def refresh(self) -> np.ndarray: # Update mechanical joint positions only if necessary + state = self.state.get() if self._must_refresh: if self.kinematic_level == pin.KinematicLevel.POSITION: - data = self.state.q + data = state.q elif self.kinematic_level == pin.KinematicLevel.VELOCITY: - data = self.state.v + data = state.v else: - data = self.state.a + data = state.a data.take(self.kinematic_indices, None, self._data, "clip") # Translate encoder data at joint level @@ -1670,7 +1688,7 @@ def refresh(self) -> np.ndarray: return self._data -class EnergyGenerationMode(Enum): +class EnergyGenerationMode(IntEnum): """Specify what happens to the energy generated by motors when breaking. """ @@ -1695,6 +1713,10 @@ class EnergyGenerationMode(Enum): """ +# Define proxies for fast lookup +_CHARGE, _LOST_EACH, _LOST_GLOBAL, _PENALIZE = map(int, EnergyGenerationMode) + + @dataclass(unsafe_hash=True) class AverageMechanicalPowerConsumption(InterfaceQuantity[float]): """Average mechanical power consumption by all the motors over a sliding @@ -1750,7 +1772,7 @@ def __init__( # Jit-able method computing the total instantaneous power consumption @nb.jit(nopython=True, cache=True, fastmath=True) - def _compute_power(generator_mode: EnergyGenerationMode, + def _compute_power(generator_mode: int, # EnergyGenerationMode motor_velocities: np.ndarray, motor_efforts: np.ndarray) -> float: """Compute the total instantaneous mechanical power consumption of @@ -1765,14 +1787,13 @@ def _compute_power(generator_mode: EnergyGenerationMode, as a 1D array. The order must be consistent with the motor indices. """ - if generator_mode in (EnergyGenerationMode.CHARGE, - EnergyGenerationMode.LOST_GLOBAL): + if generator_mode in (_CHARGE, _LOST_GLOBAL): total_power = np.dot(motor_velocities, motor_efforts) - if generator_mode == EnergyGenerationMode.CHARGE: + if generator_mode == _CHARGE: return total_power return max(total_power, 0.0) motor_powers = motor_velocities * motor_efforts - if generator_mode == EnergyGenerationMode.LOST_EACH: + if generator_mode == _LOST_EACH: return np.sum(np.maximum(motor_powers, 0.0)) return np.sum(np.abs(motor_powers)) @@ -1792,11 +1813,11 @@ def _compute_power(generator_mode: EnergyGenerationMode, kinematic_level=pin.KinematicLevel.VELOCITY, is_motor_side=True, mode=self.mode)), - op=partial(_compute_power, self.generator_mode))), + op=partial(_compute_power, int(self.generator_mode)))), max_stack=self.max_stack, as_array=True, mode='slice'))), auto_refresh=False) def refresh(self) -> float: - return np.mean(self.total_power_stack) + return np.mean(self.total_power_stack.get()) diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py index 4576cce96..1cb5f4b2e 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py @@ -156,7 +156,8 @@ def __init__(self, auto_refresh=False) def refresh(self) -> float: - return self.base_pos[2] - np.min(self.contacts_pos[2]) + base_pos, contacts_pos = self.base_pos.get(), self.contacts_pos.get() + return base_pos[2] - np.min(contacts_pos[2]) @dataclass(unsafe_hash=True) @@ -285,8 +286,9 @@ def __init__(self, def refresh(self) -> np.ndarray: # Translate spatial base velocity from local to odometry frame - quat_apply(self.quat_no_yaw_mean, - self.v_spatial.reshape((2, 3)).T, + v_spatial = self.v_spatial.get() + quat_apply(self.quat_no_yaw_mean.get(), + v_spatial.reshape((2, 3)).T, self._v_lin_ang) return self._v_spatial @@ -338,7 +340,7 @@ def __init__(self, auto_refresh=False) def refresh(self) -> np.ndarray: - return self.data + return self.data.get() @dataclass(unsafe_hash=True) @@ -404,10 +406,11 @@ def initialize(self) -> None: def refresh(self) -> np.ndarray: # Compute the local angular momentum of inertia - np.matmul(self._inertia_local, self.v_angular, self._h_angular) + np.matmul(self._inertia_local, self.v_angular.get(), self._h_angular) # Apply quaternion rotation of the local angular momentum of inertia - quat_apply(self.quat_no_yaw_mean, self._h_angular, self._h_angular) + quat_apply( + self.quat_no_yaw_mean.get(), self._h_angular, self._h_angular) return self._h_angular @@ -472,7 +475,7 @@ def __init__(self, auto_refresh=False) def refresh(self) -> np.ndarray: - return self.data + return self.data.get() @dataclass(unsafe_hash=True) @@ -545,10 +548,11 @@ def __init__(self, def refresh(self) -> np.ndarray: # Copy translation part - array_copyto(self._xy_view, self.xyzquat_mean[:2]) + xyzquat_mean = self.xyzquat_mean.get() + array_copyto(self._xy_view, xyzquat_mean[:2]) # Compute Yaw angle - quat_to_yaw(self.xyzquat_mean[-4:], self._yaw_view) + quat_to_yaw(xyzquat_mean[-4:], self._yaw_view) return self._odom_pose @@ -655,7 +659,7 @@ def translate_positions(position: np.ndarray, def refresh(self) -> np.ndarray: # Extract mean and individual frame position and quaternion vectors - xyzquats, xyzquat_mean = self.xyzquats, self.xyzquat_mean + xyzquats, xyzquat_mean = self.xyzquats.get(), self.xyzquat_mean.get() positions, position_mean = xyzquats[:3, :-1], xyzquat_mean[:3] quats, quat_mean = xyzquats[-4:, :-1], xyzquat_mean[-4:] @@ -737,7 +741,7 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - state = self.state + state = self.state.get() if ((self.kinematic_level == pin.ACCELERATION and state.a is None) or (self.kinematic_level >= pin.VELOCITY and state.v is None)): raise RuntimeError( @@ -833,7 +837,8 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - if self.state.v is None or self.state.a is None: + state = self.state.get() + if state.v is None or state.a is None: raise RuntimeError( "State data do not meet requirements. Velocity and " "acceleration are missing.") @@ -848,7 +853,7 @@ def initialize(self) -> None: def refresh(self) -> np.ndarray: # Extract intermediary quantities for convenience - (dhg_linear, dhg_angular), com = self.dhg, self.com_position + (dhg_linear, dhg_angular), com = self.dhg, self.com_position.get() # Compute the vertical force applied by the robot f_z = dhg_linear[2] + self._robot_weight @@ -861,7 +866,7 @@ def refresh(self) -> np.ndarray: # Translate the ZMP from world to local odometry frame if requested if self.reference_frame == pin.LOCAL: - translate_position_odom(self._zmp, self.odom_pose, self._zmp) + translate_position_odom(self._zmp, self.odom_pose.get(), self._zmp) return self._zmp @@ -938,7 +943,8 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - if self.state.v is None: + state = self.state.get() + if state.v is None: raise RuntimeError( "State data do not meet requirements. Velocity is missing.") @@ -962,12 +968,13 @@ def initialize(self) -> None: def refresh(self) -> np.ndarray: # Compute the DCM in world frame - com_position, com_velocity = self.com_position, self.com_velocity + com_position = self.com_position.get() + com_velocity = self.com_velocity.get() self._dcm[:] = com_position[:2] + com_velocity[:2] / self.omega # Translate the ZMP from world to local odometry frame if requested if self.reference_frame == pin.LOCAL: - translate_position_odom(self._dcm, self.odom_pose, self._dcm) + translate_position_odom(self._dcm, self.odom_pose.get(), self._dcm) return self._dcm @@ -1060,7 +1067,8 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - if self.state.lambda_c is None: + state = self.state.get() + if state.lambda_c is None: raise RuntimeError("State data do not meet requirements. " "Constraints lambda multipliers are missing.") @@ -1107,8 +1115,9 @@ def initialize(self) -> None: (6, num_contraints), order='C') def refresh(self) -> np.ndarray: + state = self.state.get() self._normalize_spatial_forces( - self.state.lambda_c, + state.lambda_c, *self._contact_slice, self._robot_weight, self._force_spatial_rel_batch) @@ -1235,7 +1244,8 @@ def initialize(self) -> None: super().initialize() # Make sure that the state data meet requirements - if self.state.lambda_c is None: + state = self.state.get() + if state.lambda_c is None: raise RuntimeError("State data do not meet requirements. " "Constraints lambda multipliers are missing.") @@ -1304,7 +1314,8 @@ def refresh(self) -> np.ndarray: self._vertical_transform_list) # Compute the normalized sum of the vertical forces in world frame - self._normalize_vertical_forces(self.state.lambda_c, + state = self.state.get() + self._normalize_vertical_forces(state.lambda_c, self._foot_slices, self._vertical_transform_batches, self._robot_weight, @@ -1362,4 +1373,4 @@ def __init__(self, auto_refresh=False) def refresh(self) -> bool: - return self.is_colliding + return self.is_colliding.get() diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/manager.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/manager.py index 2684ed454..4f8d689e1 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/manager.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/manager.py @@ -53,8 +53,13 @@ def __init__(self, env: InterfaceJiminyEnv) -> None: # This is necessary because using a quantity as key directly would # prevent its garbage collection, hence breaking automatic reset of # computation tracking for all quantities sharing its cache. - self._caches: Dict[ - Tuple[Type[InterfaceQuantity], int], SharedCache] = {} + # In case of dataclasses, their hash is the same as if it was obtained + # using `hash(dataclasses.astuple(quantity))`. This is clearly not + # unique, as all it requires to be the same is being built from the + # same nested ordered arguments. To get around this issue, we need to + # store (key, value) pairs in a list. + self._caches: List[Tuple[ + Tuple[Type[InterfaceQuantity], int], SharedCache]] = [] # Instantiate trajectory database. # Note that this quantity is not added to the global registry to avoid @@ -77,9 +82,7 @@ def reset(self, reset_tracking: bool = False) -> None: """ # Reset all quantities sequentially for quantity in self.registry.values(): - quantity.reset( - reset_tracking, - ignore_auto_refresh=not self.env.is_simulation_running) + quantity.reset(reset_tracking) def clear(self) -> None: """Clear internal cache of quantities to force re-evaluating them the @@ -90,8 +93,8 @@ def clear(self) -> None: environment has changed (ie either the agent or world itself), thereby invalidating the value currently stored in cache if any. """ - for cache in self._caches.values(): - cache.reset() + for _, cache in self._caches: + cache.reset(ignore_auto_refresh=not self.env.is_simulation_running) def add_trajectory(self, name: str, trajectory: Trajectory) -> None: """Add a new reference trajectory to the database synchronized between @@ -182,9 +185,24 @@ def _build_quantity( # Set a shared cache entry for all quantities involved in computations quantities_all = [top_quantity] while quantities_all: + # Deal with the first quantity in the process queue quantity = quantities_all.pop() + + # Get already available cache entry if any, otherwise create it key = (type(quantity), hash(quantity)) - quantity.cache = self._caches.setdefault(key, SharedCache()) + for cache_key, cache in self._caches: + if key == cache_key: + owner, *_ = cache.owners + if quantity == owner: + break + else: + cache = SharedCache() + self._caches.append((key, cache)) + + # Set shared cache of the quantity + quantity.cache = cache + + # Add all the requirements of the new quantity in the process queue quantities_all += quantity.requirements.values() return top_quantity @@ -235,11 +253,21 @@ def __delitem__(self, name: str) -> None: :param name: Name of the managed quantity to be discarded. It will raise an exception if the specified name does not exists. """ - # Remove shared cache entry for all quantities involved in computations + # Remove shared cache entries for the quantity and its requirements. + # Note that done top-down rather than bottom-up, otherwise reset of + # required quantities no longer having shared cache will be triggered + # automatically by parent quantities following computation graph + # tracking reset whenever a shared cache co-owner is removed. quantities_all = [self.registry.pop(name)] while quantities_all: - quantity = quantities_all.pop() + quantity = quantities_all.pop(0) + cache = quantity.cache quantity.cache = None # type: ignore[assignment] + if len(cache.owners) == 0: + for i, (_, _cache) in enumerate(self._caches): + if cache is _cache: + del self._caches[i] + break quantities_all += quantity.requirements.values() def __iter__(self) -> Iterator[str]: diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py index da92b57c6..8168a3c1a 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py @@ -2,12 +2,13 @@ its topology (multiple or single branch, fixed or floating base...) and the application (locomotion, grasping...). """ +import sys +import warnings from copy import deepcopy -from collections import deque from dataclasses import dataclass from typing import ( Any, Optional, Sequence, Tuple, TypeVar, Union, Generic, ClassVar, - Callable, Literal, overload, cast) + Callable, Literal, List, overload, cast) from typing_extensions import TypeAlias import numpy as np @@ -41,9 +42,9 @@ class StackedQuantity( """Base quantity whose value must be stacked over time since last reset. """ - max_stack: Optional[int] + max_stack: int """Maximum number of values that keep in memory before starting to discard - the oldest one (FIFO). None if unlimited. + the oldest one (FIFO). `sys.maxsize` if unlimited. """ as_array: bool @@ -61,12 +62,12 @@ class StackedQuantity( """ @overload - def __init__(self: "StackedQuantity[ValueT, Tuple[ValueT, ...]]", + def __init__(self: "StackedQuantity[ValueT, List[ValueT]]", env: InterfaceJiminyEnv, parent: Optional[InterfaceQuantity], quantity: QuantityCreator[ValueT], *, - max_stack: Optional[int], + max_stack: int, as_array: Literal[False], mode: Literal['slice', 'zeros']) -> None: ... @@ -77,7 +78,7 @@ def __init__(self: "StackedQuantity[Union[np.ndarray, float], np.ndarray]", parent: Optional[InterfaceQuantity], quantity: QuantityCreator[Union[np.ndarray, float]], *, - max_stack: Optional[int], + max_stack: int, as_array: Literal[True], mode: Literal['slice', 'zeros']) -> None: ... @@ -87,7 +88,7 @@ def __init__(self, parent: Optional[InterfaceQuantity], quantity: QuantityCreator[Any], *, - max_stack: Optional[int] = None, + max_stack: int = sys.maxsize, as_array: bool = False, mode: Literal['slice', 'zeros'] = 'slice') -> None: """ @@ -98,22 +99,22 @@ def __init__(self, must be stacked, plus all its constructor keyword- arguments except environment 'env' and 'parent'. :param max_stack: Maximum number of values that keep in memory before - starting to discard the oldest one (FIFO). None if - unlimited. - :param as_array: Whether to return data as a tuple or a contiguous + starting to discard the oldest one (FIFO). + Optional: The maxium sequence length by default, ie + `sys.maxsize` (2^63 - 1). + :param as_array: Whether to return data as a list or a contiguous N-dimensional array whose last dimension gathers the value of individual timesteps. - :param mode: Fallback strategy in case of incomplete stack. If - 'max_stack' is None, then only 'tuple' is supported. + :param mode: Fallback strategy in case of incomplete stack. 'zeros' is only supported by quantities returning fixed-size N-D array. Optional: 'slice' by default. """ # Make sure that the input arguments are valid - if max_stack is None and (mode != 'slice' or as_array): - raise ValueError( - "Unbounded stack length is only supported for `mode='slice'` " - "and `as_array=False`.") + if max_stack > 10000 and (mode != 'slice' or as_array): + warnings.warn( + "Very large stack length is strongly discourages for " + "`mode != 'slice'` or `as_array=True`.") # Backup user arguments self.max_stack = max_stack @@ -123,14 +124,15 @@ def __init__(self, # Call base implementation super().__init__(env, parent, - requirements=dict(data=quantity), + requirements=dict(quantity=quantity), auto_refresh=True) - # Keep track of the quantity that must be stacked once instantiated - self.quantity = self.requirements["data"] - - # Allocate deque buffer - self._deque: deque = deque(maxlen=self.max_stack) + # Allocate stack buffer. + # Note that using a plain list is more efficient in practice. Although + # front deletion is very fast compared to list, casting deque to tuple + # or list is very slow, which ultimately prevail. The matter gets worst + # as the maximum length gets longer. + self._value_list: List[ValueT] = [] # Continuous memory to store the whole stack if requested. # Note that it will be allocated lazily since the dimension of the @@ -138,6 +140,9 @@ def __init__(self, self._data = np.array([]) self._data_views: Tuple[np.ndarray, ...] = () + # Define proxy to number of steps of current episode for fast access + self._num_steps = np.array(-1) + # Keep track of the last time the quantity has been stacked self._num_steps_prev = -1 @@ -145,13 +150,16 @@ def initialize(self) -> None: # Call base implementation super().initialize() - # Clear buffer - self._deque.clear() + # Refresh proxy + self._num_steps = self.env.num_steps + + # Clear stack buffer + self._value_list.clear() # Initialize buffers if necessary if self.as_array or self.mode == 'zeros': - # Get quantity value - value = self.data + # Get current value of base quantity + value = self.quantity.get() # Make sure that the quantity is an array or a scalar if not isinstance(value, (int, float, np.ndarray)): @@ -161,10 +169,10 @@ def initialize(self) -> None: value = np.asarray(value) # Full the queue with zero if necessary - assert self.max_stack is not None if self.mode == 'zeros': for _ in range(self.max_stack): - self._deque.append(np.zeros_like(value)) + self._value_list.append( + np.zeros_like(value)) # type: ignore[arg-type] # Allocate stack memory if necessary if self.as_array: @@ -181,34 +189,38 @@ def refresh(self) -> OtherValueT: # Append value to the queue only once per step and only if a simulation # is running. Note that data must be deep-copied to make sure it does # not get altered afterward. + value_list = self._value_list if self.env.is_simulation_running: - num_steps = self.env.num_steps + num_steps = self._num_steps.item() if num_steps != self._num_steps_prev: if num_steps != self._num_steps_prev + 1: raise RuntimeError( "Previous step missing in the stack. Please reset the " - "environmentafter adding this quantity.") - value = self.data + "environment after adding this quantity.") + value = self.quantity.get() if isinstance(value, np.ndarray): - self._deque.append(value.copy()) + value_list.append(value.copy()) # type: ignore[arg-type] else: - self._deque.append(deepcopy(value)) + value_list.append(deepcopy(value)) + if len(value_list) > self.max_stack: + del value_list[0] self._num_steps_prev += 1 # Aggregate data in contiguous array only if requested - is_padded = self.mode == 'zeros' - value_list = tuple(self._deque) if self.as_array: - if is_padded: - value_list = value_list[(- self._num_steps_prev - 1):] - data_views = self._data_views[(- self._num_steps_prev - 1):] + is_padded = self.mode == 'zeros' + offset = - self._num_steps_prev - 1 + data, data_views = self._data, self._data_views + if offset > - self.max_stack: + if is_padded: + value_list = value_list[offset:] + else: + data = data[..., offset:] + data_views = self._data_views[offset:] multi_array_copyto(data_views, value_list) - if is_padded: - return cast(OtherValueT, self._data) - data = self._data[..., (- self._num_steps_prev - 1):] return cast(OtherValueT, data) - # Return the whole stack as a tuple to preserve the integrity of the + # Return the whole stack as a list to preserve the integrity of the # underlying container and make the API robust to internal changes. return cast(OtherValueT, value_list) @@ -296,24 +308,24 @@ def __init__(self, # Call base implementation super().__init__(env, parent, - requirements=dict(data=quantity), + requirements=dict(quantity=quantity), auto_refresh=False) - # Keep track of the quantity from which data must be extracted - self.quantity = self.requirements["data"] - def refresh(self) -> np.ndarray: + # Get current value of base quantity + value = self.quantity.get() + # Extract elements from quantity if not self._slices: # Note that `take` is faster than classical advanced indexing via # `operator[]` (`__getitem__`) because the latter is more generic. # Notably, `operator[]` supports boolean mask but `take` does not. - return self.data.take(self.indices, self.axis) + return value.take(self.indices, self.axis) if self.axis is None: # `ravel` must be used instead of `flat` to get a view that can # be sliced without copy. - return self.data.ravel(order="K")[self._slices] - return self.data[self._slices] + return value.ravel(order="K")[self._slices] + return value[self._slices] @dataclass(unsafe_hash=True) @@ -323,7 +335,7 @@ class UnaryOpQuantity(InterfaceQuantity[ValueT], This quantity is useful to translate quantities from world frame to local odometry frame. It may also be used to convert multi-variate quantities as - scalar, typically by computing the Lp-norm. + scalar, typically by computing the L^p-norm. """ quantity: InterfaceQuantity[OtherValueT] @@ -358,14 +370,11 @@ def __init__(self, super().__init__( env, parent, - requirements=dict(data=quantity), + requirements=dict(quantity=quantity), auto_refresh=False) - # Keep track of the left- and right-hand side quantities for hashing - self.quantity = self.requirements["data"] - def refresh(self) -> ValueT: - return self.op(self.data) + return self.op(self.quantity.get()) @dataclass(unsafe_hash=True) @@ -422,15 +431,11 @@ def __init__(self, env, parent, requirements=dict( - value_left=quantity_left, value_right=quantity_right), + quantity_left=quantity_left, quantity_right=quantity_right), auto_refresh=False) - # Keep track of the left- and right-hand side quantities for hashing - self.quantity_left = self.requirements["value_left"] - self.quantity_right = self.requirements["value_right"] - def refresh(self) -> ValueT: - return self.op(self.value_left, self.value_right) + return self.op(self.quantity_left.get(), self.quantity_right.get()) @dataclass(unsafe_hash=True) @@ -474,11 +479,11 @@ class of a quantity whose value must be passed as env, parent, requirements={ - f"value_{i}": quantity + f"quantity_{i}": quantity for i, quantity in enumerate(quantities)}, auto_refresh=False) - # Keep track of the instantiated quantities for hashing + # Keep track of the instantiated quantities for identity check self.quantities = tuple(self.requirements.values()) def refresh(self) -> ValueT: diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py index d4e47c45e..13f2664be 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py +++ b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py @@ -292,7 +292,7 @@ def build_composition( assert isinstance(cls, type) # Get its constructor keyword-arguments - kwargs = composition_config.get("kwargs", {}) + kwargs = composition_config.get("kwargs", {}).copy() # Special handling for `MixtureReward` if is_reward and issubclass(cls, MixtureReward): diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/ant.py b/python/gym_jiminy/envs/gym_jiminy/envs/ant.py index 503542345..5ee0cc7d1 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/ant.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/ant.py @@ -239,9 +239,9 @@ def compute_reward(self, terminated: bool, info: InfoType) -> float: * survive_reward: Constant positive reward equal to 1.0 as long as no termination condition has been triggered. * ctrl_cost: Negative reward to penalize power consumption, defined - as the L2-norm of the action vector weighted by 0.5. + as the L^2-norm of the action vector weighted by 0.5. * contact_cost: Negative reward to penalize jerky, violent motions, - defined as the aggregated L2-norm of the external forces applied + defined as the aggregated L^2-norm of the external forces applied on all bodies weighted by 5e-4. The value of each individual reward is added to `info` for monitoring. diff --git a/python/gym_jiminy/examples/quantity_benchmark.py b/python/gym_jiminy/examples/quantity_benchmark.py index 9a2111c4e..61cb6d43c 100644 --- a/python/gym_jiminy/examples/quantity_benchmark.py +++ b/python/gym_jiminy/examples/quantity_benchmark.py @@ -11,10 +11,9 @@ # Define number of samples for benchmarking N_SAMPLES = 50000 -# Disable caching by forcing `SharedCache.has_value` to always return `False` -setattr(gym_jiminy.common.bases.quantities.SharedCache, - "has_value", - property(lambda self: False)) +# Disable caching by disabling "IS_CACHED" FSM State +gym_jiminy.common.bases.quantities._IS_CACHED = ( + gym_jiminy.common.bases.quantities.QuantityStateMachine.IS_INITIALIZED) # Instantiate a dummy environment env = gym.make("gym_jiminy.envs:atlas") @@ -40,7 +39,7 @@ break # Extract batched data buffer of `FrameOrientation` quantities - shared_data = quantity.requirements['data'] + shared_data = quantity.data # Benchmark computation of batched data buffer duration = timeit.timeit( diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py index fdcab5d4a..6e392cf1f 100644 --- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py +++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py @@ -29,7 +29,7 @@ def tanh_normalization(value: float, be bounded or unbounded, and signed or not, without restrictions. :param cutoff: Cut-off threshold to consider. - :param order: Order of Lp-Norm that will be used as distance metric. + :param order: Order of L^p-norm that will be used as distance metric. """ value_rel = ( cutoff_high + cutoff_low - 2 * value) / (cutoff_high - cutoff_low) @@ -66,21 +66,21 @@ class MaximizeRobusntess(QuantityReward): """ def __init__(self, env: InterfaceJiminyEnv, - cutoff_inner: float, - cutoff_outer: float) -> None: + cutoff: float, + cutoff_outer: float = 0.0) -> None: """ :param env: Base or wrapped jiminy environment. - :param cutoff_inner: Cutoff threshold when the ZMP lies inside the - support polygon. The reward will be larger than - '1.0 - CUTOFF_ESP' if the distance from the border - is larger than 'cutoff_inner'. + :param cutoff: Cutoff threshold when the ZMP lies inside the support + polygon. The reward will be larger than + '1.0 - CUTOFF_ESP' if the distance from the border is + larger than 'cutoff_inner'. :param cutoff_outer: Cutoff threshold when the ZMP lies outside the support polygon. The reward will be smaller than 'CUTOFF_ESP' if the ZMP is further away from the border of the support polygon than 'cutoff_outer'. """ # Backup some user argument(s) - self.cutoff_inner = cutoff_inner + self.cutoff_inner = cutoff self.cutoff_outer = cutoff_outer # The cutoff thresholds must be positive @@ -91,7 +91,7 @@ def __init__(self, # Call base implementation super().__init__( env, - "reward_momentum", + "reward_robustness", (StabilityMarginProjectedSupportPolygon, dict( mode=QuantityEvalMode.TRUE )), diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/quantities/locomotion.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/quantities/locomotion.py index d0590d68d..60e70f769 100644 --- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/quantities/locomotion.py +++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/quantities/locomotion.py @@ -70,8 +70,7 @@ def __init__(self, env, parent, requirements=dict( - odom_pose=(BaseOdometryPose, dict(mode=mode)) - ), + odom_pose=(BaseOdometryPose, dict(mode=mode))), mode=mode, auto_refresh=False) @@ -149,7 +148,7 @@ def refresh(self) -> ConvexHull2D: # Translate candidate contact points from world to local odometry frame if self.reference_frame == pin.LOCAL: translate_position_odom(self._candidate_xy_batch, - self.odom_pose, + self.odom_pose.get(), self._candidate_xy_batch) # Compute the 2D convex hull in world plane @@ -217,4 +216,5 @@ def __init__(self, auto_refresh=False) def refresh(self) -> float: - return - self.support_polygon.get_distance_to_point(self.zmp).item() + support_polygon, zmp = self.support_polygon.get(), self.zmp.get() + return - support_polygon.get_distance_to_point(zmp).item() diff --git a/python/gym_jiminy/unit_py/test_quantities.py b/python/gym_jiminy/unit_py/test_quantities.py index 8273c2435..38beddcf7 100644 --- a/python/gym_jiminy/unit_py/test_quantities.py +++ b/python/gym_jiminy/unit_py/test_quantities.py @@ -1,5 +1,6 @@ """ TODO: Write documentation """ +import sys import math import unittest @@ -62,24 +63,30 @@ def test_shared_cache(self): assert len(quantities["com"].cache.owners) == 2 zmp_0 = quantity_manager.zmp.copy() - assert quantities["com"].cache.has_value - assert not quantities["acom"].cache.has_value - assert not quantities["com"]._is_initialized - assert quantities["zmp"].requirements["com_position"]._is_initialized + assert quantities["com"].cache.sm_state == 2 + assert quantities["acom"].cache.sm_state == 0 + is_initialized_all = [ + owner._is_initialized for owner in quantities["com"].cache.owners] + assert len(is_initialized_all) == 2 + assert len(set(is_initialized_all)) == 2 env.step(env.action_space.sample()) zmp_1 = quantity_manager["zmp"].copy() assert np.all(zmp_0 == zmp_1) quantity_manager.clear() - assert quantities["zmp"].requirements["com_position"]._is_initialized - assert not quantities["com"].cache.has_value + is_initialized_all = [ + owner._is_initialized for owner in quantities["com"].cache.owners] + assert any(is_initialized_all) + assert quantities["com"].cache.sm_state == 1 zmp_1 = quantity_manager.zmp.copy() assert np.any(zmp_0 != zmp_1) env.step(env.action_space.sample()) quantity_manager.reset() - assert not quantities["zmp"].requirements[ - "com_position"]._is_initialized + assert quantities["com"].cache.sm_state == 0 + is_initialized_all = [ + owner._is_initialized for owner in quantities["com"].cache.owners] + assert not any(is_initialized_all) zmp_2 = quantity_manager.zmp.copy() assert np.any(zmp_1 != zmp_2) @@ -126,43 +133,39 @@ def test_dynamic_batching(self): xyzquat_0 = quantity_manager.xyzquat_0.copy() rpy_0 = quantity_manager.rpy_0.copy() - assert len(quantities['rpy_0'].requirements['data'].frame_names) == 1 + assert len(quantities['rpy_0'].data.cache.owners[0].frame_names) == 1 assert np.all(rpy_0 == quantity_manager.rpy_1) rpy_2 = quantity_manager.rpy_2.copy() assert np.any(rpy_0 != rpy_2) - assert len(quantities['rpy_2'].requirements['data'].frame_names) == 2 + assert len(quantities['rpy_2'].data.cache.owners[0].frame_names) == 2 assert tuple(quantity_manager.rpy_batch_0.shape) == (3, 2) - assert len(quantities['rpy_batch_0'].requirements['data']. - frame_names) == 3 + assert len(quantities['rpy_batch_0'].data.cache.owners[0].frame_names) == 3 quantity_manager.rpy_batch_1 - assert len(quantities['rpy_batch_1'].requirements['data']. - frame_names) == 3 + assert len(quantities['rpy_batch_1'].data.cache.owners[0].frame_names) == 3 quantity_manager.rpy_batch_2 - assert len(quantities['rpy_batch_2'].requirements['data']. - frame_names) == 5 + assert len(quantities['rpy_batch_2'].data.cache.owners[0].frame_names) == 5 assert tuple(quantity_manager.rot_mat_batch.shape) == (3, 3, 2) assert tuple(quantity_manager.quat_batch.shape) == (4, 2) - assert len(quantities['quat_batch'].requirements['data']. - requirements['rot_mat_map'].frame_names) == 8 + assert len(quantities['quat_batch'].data.rot_mat_map.cache.owners[0].frame_names) == 8 env.step(env.action_space.sample()) quantity_manager.reset() rpy_0_next = quantity_manager.rpy_0 - xyzquat_0_next = quantity_manager.xyzquat_0.copy() + xyzquat_0_next = quantity_manager.xyzquat_0.copy() assert np.any(rpy_0 != rpy_0_next) assert np.any(xyzquat_0 != xyzquat_0_next) - assert len(quantities['rpy_2'].requirements['data'].frame_names) == 2 + assert len(quantities['rpy_2'].data.cache.owners[0].frame_names) == 5 - assert len(quantities['rpy_1'].requirements['data'].cache.owners) == 6 + assert len(quantities['rpy_1'].data.cache.owners) == 6 del quantity_manager['rpy_2'] - assert len(quantities['rpy_1'].requirements['data'].cache.owners) == 5 + assert len(quantities['rpy_1'].data.cache.owners) == 5 quantity_manager.rpy_1 - assert len(quantities['rpy_1'].requirements['data'].frame_names) == 1 + assert len(quantities['rpy_1'].data.cache.owners[0].frame_names) == 1 quantity_manager.reset(reset_tracking=True) assert np.all(rpy_0_next == quantity_manager.rpy_0) assert np.all(xyzquat_0_next == quantity_manager.xyzquat_0) - assert len(quantities['rpy_0'].requirements['data'].frame_names) == 1 + assert len(quantities['rpy_0'].data.cache.owners[0].frame_names) == 1 def test_discard(self): """ TODO: Write documentation @@ -182,17 +185,17 @@ def test_discard(self): quantities = quantity_manager.registry assert len(quantities['rpy_1'].cache.owners) == 2 - assert len(quantities['rpy_2'].requirements['data'].cache.owners) == 3 + assert len(quantities['rpy_2'].data.cache.owners) == 3 del quantity_manager['rpy_0'] assert len(quantities['rpy_1'].cache.owners) == 1 - assert len(quantities['rpy_2'].requirements['data'].cache.owners) == 2 + assert len(quantities['rpy_2'].data.cache.owners) == 2 del quantity_manager['rpy_1'] - assert len(quantities['rpy_2'].requirements['data'].cache.owners) == 1 + assert len(quantities['rpy_2'].data.cache.owners) == 1 del quantity_manager['rpy_2'] - for (cls, _), cache in quantity_manager._caches.items(): + for (cls, _), cache in quantity_manager._caches: assert len(cache.owners) == (cls is DatasetTrajectoryQuantity) def test_env(self): @@ -245,7 +248,7 @@ def test_stack_api(self): (3, True, "zeros")): quantity_creator = (StackedQuantity, dict( quantity=(MultiFootRelativeXYZQuat, {}), - max_stack=max_stack, + max_stack=max_stack or sys.maxsize, as_array=as_array, mode=mode)) env.quantities["xyzquat_stack"] = quantity_creator @@ -255,7 +258,7 @@ def test_stack_api(self): if as_array: assert isinstance(value, np.ndarray) else: - assert isinstance(value, tuple) + assert isinstance(value, list) for i in range(1, (max_stack or 5) + 2): num_stack = max_stack or i if mode == "slice": @@ -286,8 +289,9 @@ def test_masked(self): keys=(0, 1, 5))) quantity = env.quantities.registry["v_masked"] assert not quantity._slices + value = quantity.quantity.get() np.testing.assert_allclose( - env.quantities["v_masked"], quantity.data[[0, 1, 5]]) + env.quantities["v_masked"], value[[0, 1, 5]]) del env.quantities["v_masked"] # 2. From boolean mask @@ -295,8 +299,9 @@ def test_masked(self): quantity=(FrameXYZQuat, dict(frame_name="root_joint")), keys=(True, True, False, False, False, True))) quantity = env.quantities.registry["v_masked"] + value = quantity.quantity.get() np.testing.assert_allclose( - env.quantities["v_masked"], quantity.data[[0, 1, 5]]) + env.quantities["v_masked"], value[[0, 1, 5]]) del env.quantities["v_masked"] # 3. From slice-able indices @@ -304,9 +309,11 @@ def test_masked(self): quantity=(FrameXYZQuat, dict(frame_name="root_joint")), keys=(0, 2, 4))) quantity = env.quantities.registry["v_masked"] - assert len(quantity._slices) == 1 and quantity._slices[0] == slice(0, 5, 2) + assert len(quantity._slices) == 1 and ( + quantity._slices[0] == slice(0, 5, 2)) + value = quantity.quantity.get() np.testing.assert_allclose( - env.quantities["v_masked"], quantity.data[[0, 2, 4]]) + env.quantities["v_masked"], value[[0, 2, 4]]) def test_true_vs_reference(self): env = gym.make("gym_jiminy.envs:atlas", debug=False) @@ -408,7 +415,7 @@ def test_average_odometry_velocity(self): rot_mat @ base_velocity_mean_local[3:])) np.testing.assert_allclose( - quantity.requirements['data'].data, base_velocity_mean_world) + quantity.data.quantity.get(), base_velocity_mean_world) base_odom_velocity = base_velocity_mean_world[[0, 1, 5]] np.testing.assert_allclose( env.quantities["odometry_velocity"], base_odom_velocity) @@ -510,9 +517,9 @@ def test_capture_point(self): env.step(env.action_space.sample()) com_position = env.robot.pinocchio_data.com[0] - np.testing.assert_allclose(quantity.com_position, com_position) + np.testing.assert_allclose(quantity.com_position.get(), com_position) com_velocity = env.robot.pinocchio_data.vcom[0] - np.testing.assert_allclose(quantity.com_velocity, com_velocity) + np.testing.assert_allclose(quantity.com_velocity.get(), com_velocity) np.testing.assert_allclose( env.quantities["dcm"], com_position[:2] + com_velocity[:2] / omega) @@ -733,8 +740,8 @@ def test_power_consumption(self): mean_total_power = np.mean( total_power_stack[-quantity.max_stack:]) - np.testing.assert_allclose( - total_power, quantity.total_power_stack[-1]) + value = quantity.total_power_stack.get() + np.testing.assert_allclose(total_power, value[-1]) np.testing.assert_allclose(mean_total_power, quantity.get()) del env.quantities["mean_power_consumption"] diff --git a/python/gym_jiminy/unit_py/test_rewards.py b/python/gym_jiminy/unit_py/test_rewards.py index 2e96aeff9..b3e043a87 100644 --- a/python/gym_jiminy/unit_py/test_rewards.py +++ b/python/gym_jiminy/unit_py/test_rewards.py @@ -66,7 +66,7 @@ def test_tracking(self): (TrackingFootOrientationsReward, 2.0), (TrackingFootForceDistributionReward, 2.0)): reward = reward_class(self.env, cutoff=cutoff) - quantity = reward.quantity + quantity = reward.data self.env.reset(seed=0) action = 0.5 * self.env.action_space.sample() @@ -74,17 +74,18 @@ def test_tracking(self): self.env.step(action) _, _, terminated, _, _ = self.env.step(self.env.action) + value_left = quantity.quantity_left.get() + value_right = quantity.quantity_right.get() with np.testing.assert_raises(AssertionError): - np.testing.assert_allclose( - quantity.value_left, quantity.value_right) + np.testing.assert_allclose(value_left, value_right) if isinstance(reward, TrackingBaseHeightReward): np.testing.assert_allclose( - quantity.value_left, self.env.robot_state.q[2]) + value_left, self.env.robot_state.q[2]) gamma = - np.log(CUTOFF_ESP) / cutoff ** 2 - value = np.exp(- gamma * np.sum((quantity.op( - quantity.value_left, quantity.value_right)) ** 2)) + value = np.exp(- gamma * np.sum( + (quantity.op(value_left, value_right)) ** 2)) assert value > 0.01 np.testing.assert_allclose(reward(terminated, {}), value) @@ -120,14 +121,15 @@ def test_stability(self): """ CUTOFF_INNER, CUTOFF_OUTER = 0.1, 0.5 reward_stability = MaximizeRobusntess( - self.env, cutoff_inner=0.1, cutoff_outer=0.5) - quantity = reward_stability.quantity + self.env, cutoff=0.1, cutoff_outer=0.5) + quantity = reward_stability.data self.env.reset(seed=0) action = self.env.action_space.sample() _, _, terminated, _, _ = self.env.step(action) - dist = quantity.support_polygon.get_distance_to_point(quantity.zmp) + support_polygon = quantity.support_polygon.get() + dist = support_polygon.get_distance_to_point(quantity.zmp.get()) value = tanh_normalization(dist.item(), -CUTOFF_INNER, CUTOFF_OUTER) np.testing.assert_allclose(tanh_normalization( -CUTOFF_INNER, -CUTOFF_INNER, CUTOFF_OUTER), 1.0 - CUTOFF_ESP) @@ -140,7 +142,7 @@ def test_friction(self): """ CUTOFF = 0.5 reward_friction = MinimizeFrictionReward(self.env, cutoff=CUTOFF) - quantity = reward_friction.quantity + quantity = reward_friction.data self.env.reset(seed=0) _, _, terminated, _, _ = self.env.step(self.env.action) diff --git a/python/gym_jiminy/unit_py/test_terminations.py b/python/gym_jiminy/unit_py/test_terminations.py index dd9c62303..2a075faf9 100644 --- a/python/gym_jiminy/unit_py/test_terminations.py +++ b/python/gym_jiminy/unit_py/test_terminations.py @@ -122,8 +122,8 @@ def test_drift_tracking(self): (termination_rot, flags_rot, rotations, info_rot)): values = values[-termination.max_stack:] drift = termination.op(values[-1], values[0]) - data = termination.quantity.value_left - np.testing.assert_allclose(drift, data) + value = termination.data.quantity_left.get() + np.testing.assert_allclose(drift, value) time = self.env.stepper_state.t is_active = ( @@ -137,7 +137,7 @@ def test_drift_tracking(self): assert is_active assert terminated ^ termination.is_truncation elif is_active: - value = termination.quantity.get() + value = termination.data.get() assert np.all(value >= termination.low) assert np.all(value <= termination.high) _, _, terminated, truncated, _ = self.env.step(action) @@ -193,13 +193,13 @@ def test_shift_tracking(self): (termination_rot, flags_rot, rotations, info_rot)): values = values[-termination.max_stack:] stack = np.stack(values, axis=-1) - left = termination.quantity.value_left + left = termination.data.quantity_left.get() np.testing.assert_allclose(stack, left) - right = termination.quantity.value_right + right = termination.data.quantity_right.get() diff = termination.op(left, right) shift = np.min(np.linalg.norm( diff.reshape((-1, len(values))), axis=0)) - value = termination.quantity.get() + value = termination.data.get() np.testing.assert_allclose(shift, value) time = self.env.stepper_state.t @@ -334,10 +334,10 @@ def test_drift_tracking_base_odom(self): MAX_POS_ERROR, MAX_ROT_ERROR = 0.1, 0.2 termination_pos = DriftTrackingBaseOdometryPositionTermination( self.env, MAX_POS_ERROR, 1.0) - quantity_pos = termination_pos.quantity + quantity_pos = termination_pos.data termination_rot = DriftTrackingBaseOdometryOrientationTermination( self.env, MAX_ROT_ERROR, 1.0) - quantity_rot = termination_rot.quantity + quantity_rot = termination_rot.data self.env.reset(seed=0) action = self.env.action_space.sample() @@ -346,10 +346,14 @@ def test_drift_tracking_base_odom(self): if terminated: break terminated, truncated = termination_pos({}) - diff = quantity_pos.value_left - quantity_pos.value_right + value_left = quantity_pos.quantity_left.get() + value_right = quantity_pos.quantity_right.get() + diff = value_left - value_right is_valid = np.linalg.norm(diff) <= MAX_POS_ERROR assert terminated ^ is_valid - diff = quantity_rot.value_left - quantity_rot.value_right + value_left = quantity_rot.quantity_left.get() + value_right = quantity_rot.quantity_right.get() + diff = value_left - value_right terminated, truncated = termination_rot({}) is_valid = np.abs(diff) <= MAX_ROT_ERROR assert terminated ^ is_valid diff --git a/python/jiminy_py/src/jiminy_py/dynamics.py b/python/jiminy_py/src/jiminy_py/dynamics.py index f1d0c42b3..9a6c8a9cd 100644 --- a/python/jiminy_py/src/jiminy_py/dynamics.py +++ b/python/jiminy_py/src/jiminy_py/dynamics.py @@ -311,7 +311,10 @@ def get(self, if t - t_end > TRAJ_INTERP_TOL or t_start - t > TRAJ_INTERP_TOL: raise RuntimeError("Time is out-of-range.") elif mode == "wrap": - t = ((t - t_start) % (t_end - t_start)) + t_start + if t_end > t_start: + t = ((t - t_start) % (t_end - t_start)) + t_start + else: + t = t_start else: t = max(t, t_start) # Clipping right it is sufficient diff --git a/python/jiminy_py/src/jiminy_py/simulator.py b/python/jiminy_py/src/jiminy_py/simulator.py index 06d3a2b43..ac3e79fca 100644 --- a/python/jiminy_py/src/jiminy_py/simulator.py +++ b/python/jiminy_py/src/jiminy_py/simulator.py @@ -202,7 +202,7 @@ def build(cls, config_path: Optional[str] = None, avoid_instable_collisions: bool = True, debug: bool = False, - *, robot_name: str = "", + *, name: str = "", **kwargs: Any) -> 'Simulator': r"""Create a new single-robot simulator instance from scratch based on configuration files only. @@ -233,8 +233,8 @@ def build(cls, its vertices. :param debug: Whether the debug mode must be activated. Doing it enables temporary files automatic deletion. - :param robot_name: Desired name of the robot. - Optional: Empty string by default. + :param name: Desired name of the robot. + Optional: Empty string by default. :param kwargs: Keyword arguments to forward to class constructor. """ # Handling of default argument(s) @@ -246,7 +246,7 @@ def build(cls, # Instantiate and initialize the robot robot = _build_robot_from_urdf( - robot_name, urdf_path, hardware_path, mesh_path_dir, has_freeflyer, + name, urdf_path, hardware_path, mesh_path_dir, has_freeflyer, avoid_instable_collisions, debug) # Instantiate and initialize the engine