Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gym/common] Refactor quantity management to dramatically improve its performance. #821

Merged
merged 5 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/src/io/serialization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,7 @@ namespace boost::serialization

// Restore extended simulation model
model.pinocchioModel_ = pinocchioModel;
model.pinocchioData_ = pinocchio::Data(pinocchioModel);
}

template<class Archive>
Expand Down
4 changes: 2 additions & 2 deletions core/src/solver/constraint_solvers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,11 @@ namespace jiminy
they can grow arbitrary large for constraints whose bounds are active. It follows
that stagnation of residuals is the only viable criteria.
The PGS algorithm has been modified for solving second-order cone LCP, which means
that only the L2-norm of the tangential forces can be expected to converge. Because
that only the L^2-norm of the tangential forces can be expected to converge. Because
of this, it is too restrictive to check the element-wise variation of the residuals
over iterations. It makes more sense to look at the Linf-norm instead, but this
criteria is very lax. A good compromise may be to look at the constraint-block-wise
L2-norm, which is similar to what Drake simulator is doing. For reference, see:
L^2-norm, which is similar to what Drake simulator is doing. For reference, see:
https://github.com/RobotLocomotion/drake/blob/master/multibody/contact_solvers/pgs_solver.cc
*/
const double tol = tolAbs_ + tolRel_ * y_.lpNorm<Eigen::Infinity>() + EPS;
Expand Down
12 changes: 6 additions & 6 deletions python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
greatly reduces code duplication and bugs.
"""
from abc import ABC, abstractmethod
from enum import Enum
from enum import IntEnum
from typing import Tuple, Sequence, Callable, Union, Optional, Generic, TypeVar

import numpy as np
Expand Down Expand Up @@ -234,7 +234,7 @@ def __init__(self,
self.env.quantities[self.name] = quantity

# Keep track of the underlying quantity
self.quantity = self.env.quantities.registry[self.name]
self.data = self.env.quantities.registry[self.name]

def __del__(self) -> None:
try:
Expand Down Expand Up @@ -266,7 +266,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
return None

# Evaluate raw quantity
value = self.quantity.get()
value = self.data.get()

# Early return if quantity is None
if value is None:
Expand Down Expand Up @@ -383,7 +383,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
return reward_total


class EpisodeState(Enum):
class EpisodeState(IntEnum):
"""Specify the current state of the ongoing episode.
"""

Expand Down Expand Up @@ -595,7 +595,7 @@ def __init__(self,
self.env.quantities[self.name] = quantity

# Keep track of the underlying quantity
self.quantity = self.env.quantities.registry[self.name]
self.data = self.env.quantities.registry[self.name]

def __del__(self) -> None:
try:
Expand All @@ -615,7 +615,7 @@ def compute(self, info: InfoType) -> bool:
This method is not meant to be overloaded.
"""
# Evaluate the quantity
value = self.quantity.get()
value = self.data.get()

# Check if the quantity is out-of-bounds bound.
# Note that it may be `None` if the quantity is ill-defined for the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _observer_handle(self,
# they are always updated before the controller gets called, no matter
# if either one or the other is time-continuous. Hacking the internal
# dynamics to clear quantities does not address this issue either.
self.quantities.clear()
# self.quantities.clear()

# Refresh the observation if not already done but only if a simulation
# is already running. It would be pointless to refresh the observation
Expand Down
36 changes: 30 additions & 6 deletions python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,10 @@ def refresh_observation(self, measurement: EngineObsType) -> None:
self.env.refresh_observation(measurement)

def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
"""Determine whether the episode is over, because a terminal state of
the underlying MDP has been reached or an aborting condition outside
the scope of the MDP has been triggered.
"""Determine whether the practitioner is instructed to stop the ongoing
episode on the spot because a termination condition has been triggered,
either coming from the based environment or from the ad-hoc termination
conditions that has been plugged on top of it.

At each step of the wrapped environment, all its termination conditions
will be evaluated sequentially until one of them eventually gets
Expand All @@ -465,6 +466,9 @@ def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
This method is called after `refresh_observation`, so that the
internal buffer 'observation' is up-to-date.

.. seealso::
See `InterfaceJiminyEnv.has_terminated` documentation for details.

:param info: Dictionary of extra information for monitoring.

:returns: terminated and truncated flags.
Expand Down Expand Up @@ -492,9 +496,29 @@ def compute_command(self, action: ActT, command: np.ndarray) -> None:
self.env.compute_command(action, command)

def compute_reward(self, terminated: bool, info: InfoType) -> float:
if self.reward is None:
return 0.0
return self.reward(terminated, info)
"""Compute the total reward, ie the sum of the original reward from the
wrapped environment with the ad-hoc reward components that has been
plugged on top of it.

.. seealso::
See `InterfaceController.compute_reward` documentation for details.

:param terminated: Whether the episode has reached the terminal state
of the MDP at the current step. This flag can be
used to compute a specific terminal reward.
:param info: Dictionary of extra information for monitoring.

:returns: Aggregated reward for the current step.
"""
# Compute base reward
reward = self.env.compute_reward(terminated, info)

# Add composed reward if any
if self.reward is not None:
reward += self.reward(terminated, info)

# Return total reward
return reward


class ObservedJiminyEnv(
Expand Down
Loading
Loading