Skip to content

Commit

Permalink
[gym/common] Add angular momentum and support polygon stability reward.
Browse files Browse the repository at this point in the history
  • Loading branch information
duburcqa committed Jun 9, 2024
1 parent 1cc15aa commit 1ded853
Show file tree
Hide file tree
Showing 16 changed files with 601 additions and 163 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,11 @@ def _controller_handle(self,
self.__is_observation_refreshed = False

@property
@abstractmethod
def unwrapped(self) -> "BaseJiminyEnv":
"""The "underlying environment at the basis of the pipeline from which
this environment is part of.
"""
return self

@property
@abstractmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=missing-module-docstring

from .mixin import (radial_basis_function,
from .mixin import (CUTOFF_ESP,
radial_basis_function,
AdditiveMixtureReward,
MultiplicativeMixtureReward)
from .generic import (BaseTrackingReward,
Expand All @@ -14,6 +15,7 @@
MinimizeAngularMomentumReward)

__all__ = [
"CUTOFF_ESP",
"radial_basis_function",
"AdditiveMixtureReward",
"MultiplicativeMixtureReward",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ class BaseTrackingReward(BaseQuantityReward):
otherwise an exception will be risen. See `DatasetTrajectoryQuantity` and
`AbstractQuantity` documentations for details.
The error transform in a normalized reward to maximize by applying RBF
The error is transformed in a normalized reward to maximize by applying RBF
kernel on the error. The reward will be 0.0 if the error cancels out
completely and less than 0.01 above the user-specified cutoff threshold.
completely and less than 'CUTOFF_ESP' above the user-specified cutoff
threshold.
"""
def __init__(self,
env: InterfaceJiminyEnv,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


# Reward value at cutoff threshold
RBF_CUTOFF_ESP = 1.0e-2
CUTOFF_ESP = 1.0e-2


ArrayOrScalar = Union[np.ndarray, float]
Expand Down Expand Up @@ -51,7 +51,7 @@ def radial_basis_function(error: ArrayOrScalar,
squared_dist_rel = np.dot(error_, error_) / math.pow(cutoff, 2)
else:
squared_dist_rel = math.pow(np.linalg.norm(error_, order) / cutoff, 2)
return math.pow(RBF_CUTOFF_ESP, squared_dist_rel)
return math.pow(CUTOFF_ESP, squared_dist_rel)


class AdditiveMixtureReward(BaseMixtureReward):
Expand Down
4 changes: 4 additions & 0 deletions python/gym_jiminy/common/gym_jiminy/common/envs/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,10 @@ def step_dt(self) -> float:
def is_training(self) -> bool:
return self._is_training

@property
def unwrapped(self) -> "BaseJiminyEnv":
return self

def train(self) -> None:
self._is_training = True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
MultiFootRelativeXYZQuat,
CenterOfMass,
CapturePoint,
ZeroMomentPoint)
ZeroMomentPoint,
translate_position_odom)


__all__ = [
Expand Down Expand Up @@ -57,4 +58,5 @@
'CenterOfMass',
'CapturePoint',
'ZeroMomentPoint',
'translate_position_odom'
]
11 changes: 7 additions & 4 deletions python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def initialize(self) -> None:
# Re-allocate memory as the number of frames is not known in advance.
# Note that Fortran memory layout (column-major) is used for speed up
# because it preserves contiguity when copying frame data.
# Anyway, C memory layout (row-major) does not make sense in this case
# since chunks of columns are systematically extracted, which means
# that the returned array would NEVER be contiguous.
nframes = len(self.frame_names)
self._rot_mat_batch = np.zeros((3, 3, nframes), order='F')

Expand Down Expand Up @@ -387,9 +390,9 @@ def initialize(self) -> None:
# Re-allocate memory as the number of frames is not known in advance
nframes = len(self.frame_names)
if self.type in (OrientationType.EULER, OrientationType.ANGLE_AXIS):
self._data_batch = np.zeros((3, nframes), order='C')
self._data_batch = np.zeros((3, nframes), order='F')
elif self.type == OrientationType.QUATERNION:
self._data_batch = np.zeros((4, nframes), order='C')
self._data_batch = np.zeros((4, nframes), order='F')

# Re-assign mapping from chunks of frame names to corresponding data
if self.type is not OrientationType.MATRIX:
Expand Down Expand Up @@ -640,7 +643,7 @@ def initialize(self) -> None:

# Re-allocate memory as the number of frames is not known in advance
nframes = len(self.frame_names)
self._pos_batch = np.zeros((3, nframes), order='C')
self._pos_batch = np.zeros((3, nframes), order='F')

# Refresh proxies
self._pos_views.clear()
Expand Down Expand Up @@ -908,7 +911,7 @@ def __init__(self,
auto_refresh=False)

# Pre-allocate memory for storing the pose XYZQuat of all frames
self._xyzquats = np.zeros((7, len(frame_names)), order='C')
self._xyzquats = np.zeros((7, len(frame_names)), order='F')

def refresh(self) -> np.ndarray:
# Copy the position of all frames at once in contiguous buffer
Expand Down
13 changes: 7 additions & 6 deletions python/gym_jiminy/envs/gym_jiminy/envs/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
PDAdapter,
MahonyFilter)
from gym_jiminy.common.utils import build_pipeline
from gym_jiminy.toolbox.math import ConvexHull
from gym_jiminy.toolbox.math import ConvexHull2D

if sys.version_info < (3, 9):
from importlib_resources import files
Expand Down Expand Up @@ -100,18 +100,19 @@
def _cleanup_contact_points(env: WalkerJiminyEnv) -> None:
contact_frame_indices = env.robot.contact_frame_indices
contact_frame_names = env.robot.contact_frame_names
num_contacts = int(len(env.robot.contact_frame_indices) // 2)
num_contacts = len(env.robot.contact_frame_indices) // 2
for contact_slice in (slice(num_contacts), slice(num_contacts, None)):
contact_positions = np.stack([
env.robot.pinocchio_data.oMf[frame_index].translation
for frame_index in contact_frame_indices[contact_slice]], axis=0)
for frame_index in contact_frame_indices[contact_slice]
], axis=0)
contact_bottom_index = np.argsort(
contact_positions[:, 2])[:int(num_contacts//2)]
convex_hull = ConvexHull(contact_positions[contact_bottom_index, :2])
contact_positions[:, 2])[:(num_contacts // 2)]
convex_hull = ConvexHull2D(contact_positions[contact_bottom_index, :2])
env.robot.remove_contact_points([
contact_frame_names[contact_slice][i]
for i in set(range(num_contacts)).difference(
contact_bottom_index[convex_hull._vertex_indices])])
contact_bottom_index[convex_hull.indices])])


class AtlasJiminyEnv(WalkerJiminyEnv):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# pylint: disable=missing-module-docstring

from .locomotion import tanh_normalization, MaximizeStability

__all__ = [
"tanh_normalization",
"MaximizeStability"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Rewards mainly relevant for locomotion tasks on floating-base robots.
"""
import math
from functools import partial

import numba as nb

from gym_jiminy.common.compositions import CUTOFF_ESP
from gym_jiminy.common.bases import (
InterfaceJiminyEnv, QuantityEvalMode, BaseQuantityReward)

from ..quantities import StabilityMarginProjectedSupportPolygon


@nb.jit(nopython=True, cache=True)
def tanh_normalization(value: float,
cutoff_low: float,
cutoff_high: float) -> float:
"""Normalize a given quantity between 0.0 and 1.0.
The extremum 0.0 and 1.0 correspond to the upper and lower cutoff
respectively, if the lower cutoff is smaller than the upper cutoff. The
other way around otherwise. These extremum are reached asymptotically,
which is that the gradient is never zero but rather vanishes exponentially.
The gradient will be steeper if the cutoff range is tighter and the other
way around.
:param value: Value of the scalar floating-point quantity. The quantity may
be bounded or unbounded, and signed or not, without
restrictions.
:param cutoff: Cut-off threshold to consider.
:param order: Order of Lp-Norm that will be used as distance metric.
"""
value_rel = (
cutoff_high + cutoff_low - 2 * value) / (cutoff_high - cutoff_low)
return 1.0 / (1.0 + math.pow(CUTOFF_ESP / (1.0 - CUTOFF_ESP), value_rel))


class MaximizeStability(BaseQuantityReward):
"""Encourage the agent to maintain itself in postures as robust as possible
to external disturbances.
The signed distance is transformed in a normalized reward to maximize by
applying rescaled tanh. The reward is smaller than CUTOFF_ESP if the ZMP is
outside the projected support polygon and further away from the border than
the upper cutoff. Conversely, the reward is larger than 1.0 - CUTOFF_ESP if
the ZMP is inside the projected support polygon and further away from the
border than the lower cutoff.
The agent may opt from one of the two very different strategies to maximize
this reward:
* Foot placement: reshaping the projected support polygon by moving the
feet (aka the candidate contact points in the direction of the ZMP
without actually moving the ZMP itself.
* Torso/Ankle control: Modulating the linear and angular momentum of its
upper-body to move the ZMP closer to the Chebyshev center of the
projected support polygon while holding the feet at the exact same
location.
These two strategies are complementary rather than mutually exclusive.
Usually, ankle control is preferred for small disturbances. Foot placement
comes to place when ankle control is no longer sufficient to keep balance.
Indeed, the first strategy is only capable of recovering 0-step capturable
disturbances, while the second one is only limited to inf-step capturable
disturbances, which includes and dramatically extends 0-step capturability.
"""
def __init__(self,
env: InterfaceJiminyEnv,
cutoff_inner: float,
cutoff_outer: float) -> None:
"""
:param env: Base or wrapped jiminy environment.
:param cutoff_inner: Cutoff threshold when the ZMP lies inside the
support polygon. The reward will be larger than
'1.0 - CUTOFF_ESP' if the distance from the border
is larger than 'cutoff_inner'.
:param cutoff_outer: Cutoff threshold when the ZMP lies outside the
support polygon. The reward will be smaller than
'CUTOFF_ESP' if the ZMP is further away from the
border of the support polygon than 'cutoff_outer'.
"""
# Backup some user argument(s)
self.cutoff_inner = cutoff_inner
self.cutoff_outer = cutoff_outer

# The cutoff thresholds must be positive
if self.cutoff_inner < 0.0 or self.cutoff_outer < 0.0:
raise ValueError(
"The inner and outer cutoff must both be positive.")

# Call base implementation
super().__init__(
env,
"reward_momentum",
(StabilityMarginProjectedSupportPolygon, dict(
mode=QuantityEvalMode.TRUE
)),
partial(tanh_normalization,
cutoff_low=self.cutoff_inner,
cutoff_high=-self.cutoff_outer),
is_normalized=True,
is_terminal=False)
6 changes: 3 additions & 3 deletions python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# pylint: disable=missing-module-docstring

from .qhull import ConvexHull, compute_distance_convex_to_point
from .qhull import ConvexHull2D, compute_convex_chebyshev_center

__all__ = [
"ConvexHull",
"compute_distance_convex_to_point"
"ConvexHull2D",
"compute_convex_chebyshev_center"
]

try:
Expand Down
Loading

0 comments on commit 1ded853

Please sign in to comment.