Skip to content

Commit

Permalink
[gym/common] Refactor quantity management to dramatically improve its…
Browse files Browse the repository at this point in the history
… performance. (#821)

* [core] Fix robot serialization issue.
* [python/simulator] Consistent keyword arguments between 'Simulator.build' and 'Simulator.add_robot'.
* [gym/common] Fix quantity hash collision issue in quantity manager.
* [gym/common] Refactor quantity management to dramatically improve performance.
* [gym/common] Add 'order' option to 'AdditiveReward'.
  • Loading branch information
duburcqa authored Jul 8, 2024
1 parent 0443f50 commit ed2af39
Show file tree
Hide file tree
Showing 25 changed files with 748 additions and 477 deletions.
1 change: 1 addition & 0 deletions core/src/io/serialization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,7 @@ namespace boost::serialization

// Restore extended simulation model
model.pinocchioModel_ = pinocchioModel;
model.pinocchioData_ = pinocchio::Data(pinocchioModel);
}

template<class Archive>
Expand Down
4 changes: 2 additions & 2 deletions core/src/solver/constraint_solvers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,11 @@ namespace jiminy
they can grow arbitrary large for constraints whose bounds are active. It follows
that stagnation of residuals is the only viable criteria.
The PGS algorithm has been modified for solving second-order cone LCP, which means
that only the L2-norm of the tangential forces can be expected to converge. Because
that only the L^2-norm of the tangential forces can be expected to converge. Because
of this, it is too restrictive to check the element-wise variation of the residuals
over iterations. It makes more sense to look at the Linf-norm instead, but this
criteria is very lax. A good compromise may be to look at the constraint-block-wise
L2-norm, which is similar to what Drake simulator is doing. For reference, see:
L^2-norm, which is similar to what Drake simulator is doing. For reference, see:
https://github.com/RobotLocomotion/drake/blob/master/multibody/contact_solvers/pgs_solver.cc
*/
const double tol = tolAbs_ + tolRel_ * y_.lpNorm<Eigen::Infinity>() + EPS;
Expand Down
12 changes: 6 additions & 6 deletions python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
greatly reduces code duplication and bugs.
"""
from abc import ABC, abstractmethod
from enum import Enum
from enum import IntEnum
from typing import Tuple, Sequence, Callable, Union, Optional, Generic, TypeVar

import numpy as np
Expand Down Expand Up @@ -234,7 +234,7 @@ def __init__(self,
self.env.quantities[self.name] = quantity

# Keep track of the underlying quantity
self.quantity = self.env.quantities.registry[self.name]
self.data = self.env.quantities.registry[self.name]

def __del__(self) -> None:
try:
Expand Down Expand Up @@ -266,7 +266,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
return None

# Evaluate raw quantity
value = self.quantity.get()
value = self.data.get()

# Early return if quantity is None
if value is None:
Expand Down Expand Up @@ -383,7 +383,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
return reward_total


class EpisodeState(Enum):
class EpisodeState(IntEnum):
"""Specify the current state of the ongoing episode.
"""

Expand Down Expand Up @@ -595,7 +595,7 @@ def __init__(self,
self.env.quantities[self.name] = quantity

# Keep track of the underlying quantity
self.quantity = self.env.quantities.registry[self.name]
self.data = self.env.quantities.registry[self.name]

def __del__(self) -> None:
try:
Expand All @@ -615,7 +615,7 @@ def compute(self, info: InfoType) -> bool:
This method is not meant to be overloaded.
"""
# Evaluate the quantity
value = self.quantity.get()
value = self.data.get()

# Check if the quantity is out-of-bounds bound.
# Note that it may be `None` if the quantity is ill-defined for the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _observer_handle(self,
# they are always updated before the controller gets called, no matter
# if either one or the other is time-continuous. Hacking the internal
# dynamics to clear quantities does not address this issue either.
self.quantities.clear()
# self.quantities.clear()

# Refresh the observation if not already done but only if a simulation
# is already running. It would be pointless to refresh the observation
Expand Down
36 changes: 30 additions & 6 deletions python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,10 @@ def refresh_observation(self, measurement: EngineObsType) -> None:
self.env.refresh_observation(measurement)

def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
"""Determine whether the episode is over, because a terminal state of
the underlying MDP has been reached or an aborting condition outside
the scope of the MDP has been triggered.
"""Determine whether the practitioner is instructed to stop the ongoing
episode on the spot because a termination condition has been triggered,
either coming from the based environment or from the ad-hoc termination
conditions that has been plugged on top of it.
At each step of the wrapped environment, all its termination conditions
will be evaluated sequentially until one of them eventually gets
Expand All @@ -465,6 +466,9 @@ def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
This method is called after `refresh_observation`, so that the
internal buffer 'observation' is up-to-date.
.. seealso::
See `InterfaceJiminyEnv.has_terminated` documentation for details.
:param info: Dictionary of extra information for monitoring.
:returns: terminated and truncated flags.
Expand Down Expand Up @@ -492,9 +496,29 @@ def compute_command(self, action: ActT, command: np.ndarray) -> None:
self.env.compute_command(action, command)

def compute_reward(self, terminated: bool, info: InfoType) -> float:
if self.reward is None:
return 0.0
return self.reward(terminated, info)
"""Compute the total reward, ie the sum of the original reward from the
wrapped environment with the ad-hoc reward components that has been
plugged on top of it.
.. seealso::
See `InterfaceController.compute_reward` documentation for details.
:param terminated: Whether the episode has reached the terminal state
of the MDP at the current step. This flag can be
used to compute a specific terminal reward.
:param info: Dictionary of extra information for monitoring.
:returns: Aggregated reward for the current step.
"""
# Compute base reward
reward = self.env.compute_reward(terminated, info)

# Add composed reward if any
if self.reward is not None:
reward += self.reward(terminated, info)

# Return total reward
return reward


class ObservedJiminyEnv(
Expand Down
Loading

0 comments on commit ed2af39

Please sign in to comment.