Skip to content

Commit

Permalink
[gym/common] Add ground friction minimization reward.
Browse files Browse the repository at this point in the history
  • Loading branch information
duburcqa committed Jun 11, 2024
1 parent 96b1daf commit 480d389
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
TrackingCapturePointReward,
TrackingFootPositionsReward,
TrackingFootOrientationsReward,
MinimizeAngularMomentumReward)
MinimizeAngularMomentumReward,
MinimizeFrictionReward)

__all__ = [
"CUTOFF_ESP",
Expand All @@ -27,5 +28,6 @@
"TrackingFootPositionsReward",
"TrackingFootOrientationsReward",
"MinimizeAngularMomentumReward",
"MinimizeFrictionReward",
"SurviveReward"
]
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from ..bases import (
InterfaceJiminyEnv, StateQuantity, QuantityEvalMode, BaseQuantityReward)
from ..quantities import (
MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity,
AverageBaseMomentum, MultiFootRelativeXYZQuat, CapturePoint)
MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity, CapturePoint,
MultiFootRelativeXYZQuat, MultiContactRelativeForceTangential,
AverageBaseMomentum)
from ..quantities.locomotion import sanitize_foot_frame_names
from ..utils import quat_difference

Expand Down Expand Up @@ -221,3 +222,34 @@ def __init__(self,
partial(radial_basis_function, cutoff=self.cutoff, order=2),
is_normalized=True,
is_terminal=False)


class MinimizeFrictionReward(BaseQuantityReward):
"""Reward the agent for minimizing the tangential forces at all the contact
points and collision bodies, and to avoid jerky intermittent contact state.
The L2-norm is used to aggregate all the local tangential forces. While the
L1-norm would be more natural in this specific cases, using the L2-norm is
preferable as it promotes space-time regularity, ie balancing the force
distribution evenly between all the candidate contact points and avoiding
jerky contact forces over time (high-frequency vibrations), phenomena to
which the L1-norm is completely insensitive.
"""
def __init__(self,
env: InterfaceJiminyEnv,
cutoff: float) -> None:
"""
:param env: Base or wrapped jiminy environment.
:param cutoff: Cutoff threshold for the RBF kernel transform.
"""
# Backup some user argument(s)
self.cutoff = cutoff

# Call base implementation
super().__init__(
env,
"reward_friction",
(MultiContactRelativeForceTangential, dict()),
partial(radial_basis_function, cutoff=self.cutoff, order=2),
is_normalized=True,
is_terminal=False)
18 changes: 14 additions & 4 deletions python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,21 @@ def radial_basis_function(error: ArrayOrScalar,
:param cutoff: Cut-off threshold to consider.
:param order: Order of Lp-Norm that will be used as distance metric.
"""
error_ = np.asarray(error).reshape((-1,))
if order == 2:
squared_dist_rel = np.dot(error_, error_) / math.pow(cutoff, 2)
error_ = np.atleast_1d(error)
if error_.flags.contiguous or order != 2:
if error_.ndim > 1 and not error_.flags.contiguous:
error_ = np.ascontiguousarray(error_)
if error_.flags.c_contiguous:
error1d = np.asarray(error_).ravel()
else:
error1d = np.asarray(error_.T).ravel()
if order == 2:
squared_dist_rel = np.dot(error1d, error1d) / math.pow(cutoff, 2)
else:
squared_dist_rel = math.pow(
np.linalg.norm(error1d, order) / cutoff, 2)
else:
squared_dist_rel = math.pow(np.linalg.norm(error_, order) / cutoff, 2)
squared_dist_rel = np.sum(np.square(error_)) / math.pow(cutoff, 2)
return math.pow(CUTOFF_ESP, squared_dist_rel)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
CenterOfMass,
CapturePoint,
ZeroMomentPoint,
MultiContactRelativeForceTangential,
translate_position_odom)


Expand Down Expand Up @@ -58,5 +59,6 @@
'CenterOfMass',
'CapturePoint',
'ZeroMomentPoint',
'MultiContactRelativeForceTangential',
'translate_position_odom'
]
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import numba as nb

import jiminy_py.core as jiminy
from jiminy_py.core import array_copyto # pylint: disable=no-name-in-module
from jiminy_py.core import ( # pylint: disable=no-name-in-module
array_copyto, multi_array_copyto)
from jiminy_py.dynamics import update_quantities
import pinocchio as pin

Expand Down Expand Up @@ -902,3 +903,92 @@ def refresh(self) -> np.ndarray:
translate_position_odom(self._dcm, self.odom_pose, self._dcm)

return self._dcm


@dataclass(unsafe_hash=True)
class MultiContactRelativeForceTangential(InterfaceQuantity[np.ndarray]):
"""Standardized tangential forces apply on all contact points and collision
bodies.
The tangential force is rescaled by the weight of the robot rather than the
actual vertical force. It has the advantage to guarantee that the resulting
quantity is never poorly conditioned, which would be the case otherwise.
Moreover, the effect of the vertical force is not canceled out, which is
interesting for deriving a reward, as it allows for indirectly penalize
jerky contact states and violent impacts. The side effect is not being able
to guarantee that this quantity is bounded. Indeed, only the ratio of the
norm of the tangential force at every contact point (or the resulting one)
is bounded by the product of the friction coefficient by the vertical
force, not the tangential force itself. This issue is a minor inconvenience
as all it requires is normalization using RBF kernel to make it finite.
"""

def __init__(self,
env: InterfaceJiminyEnv,
parent: Optional[InterfaceQuantity]) -> None:
"""
:param env: Base or wrapped jiminy environment.
:param parent: Higher-level quantity from which this quantity is a
requirement if any, `None` otherwise.
"""
# Call base implementation
super().__init__(
env,
parent,
requirements={},
auto_refresh=False)

# Weight of the robot
self._robot_weight: float = -1

# Stacked tangential forces on all contact points and collision bodies
self._force_tangential_batch = np.array([])

# Define proxy for views of individual tangential forces in the stack
self._force_tangential_views: Tuple[np.ndarray, ...] = ()

# Define proxy for the current tangential forces
self._force_tangential_refs: Tuple[np.ndarray, ...] = ()

def initialize(self) -> None:
# Call base implementation
super().initialize()

# Compute the weight of the robot
gravity = abs(self.env.robot.pinocchio_model.gravity.linear[2])
robot_mass = self.env.robot.pinocchio_data.mass[0]
self._robot_weight = robot_mass * gravity

# Get all frame constraints associated with contacts and collisions
frame_constraints: Sequence[jiminy.FrameConstraint] = []
for constraint in self.env.robot.constraints.contact_frames.values():
assert isinstance(constraint, jiminy.FrameConstraint)
frame_constraints.append(constraint)
for constraints_body in self.env.robot.constraints.collision_bodies:
for constraint in constraints_body:
assert isinstance(constraint, jiminy.FrameConstraint)
frame_constraints.append(constraint)

# Extract references to all the tangential forces
force_tangential_refs = []
for constraint in frame_constraints:
# f_lin, f_ang = lambda[:3], np.array([0.0, 0.0, lambda[3]])
force_tangential_refs.append(constraint.lambda_c[:2])
self._force_tangential_refs = tuple(force_tangential_refs)

# Pre-allocated memory for stacked tangential forces
self._force_tangential_batch = np.zeros(
(2, len(self._force_tangential_refs)), order='F')

# Refresh proxies
self._force_tangential_views = tuple(self._force_tangential_batch.T)

def refresh(self) -> np.ndarray:
# Copy all tangential forces in contiguous buffer
multi_array_copyto(self._force_tangential_views,
self._force_tangential_refs)

# Scale the tangential forces by the weight of the robot
self._force_tangential_batch /= self._robot_weight

return self._force_tangential_batch
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ class MaskedQuantity(InterfaceQuantity[np.ndarray]):
Elements will be extract by copy unless the indices of the elements to
extract to be written equivalently by a slice (ie they are evenly spaced),
and the array can be flattened while preserving memory contiguity if 'axis'
is `None`.
is `None`, which means that the result will be different between C- and F-
contiguous arrays.
"""

quantity: InterfaceQuantity
Expand Down Expand Up @@ -204,9 +205,9 @@ def refresh(self) -> np.ndarray:
# Notably, `operator[]` supports boolean mask but `take` does not.
return self.data.take(self.indices, self.axis)
if self.axis is None:
# `reshape` must be used instead of `flat` to get a view that can
# `ravel` must be used instead of `flat` to get a view that can
# be sliced without copy.
return self.data.reshape((-1,))[self._slices]
return self.data.ravel(order="K")[self._slices]
return self.data[self._slices]


Expand Down
3 changes: 2 additions & 1 deletion python/gym_jiminy/unit_py/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,5 @@ def test_math(self):
np.testing.assert_allclose(
rpy_i, pin.rpy.matrixToRpy(rot_mat_i.reshape((3, 3))))
np.testing.assert_allclose(math.rpy_to_quat(rpy), quat)
np.testing.assert_allclose(math.matrix_to_quat(rot_mat), quat)
np.testing.assert_allclose(
math.matrix_to_quat(rot_mat), quat, atol=1e-9)
24 changes: 23 additions & 1 deletion python/gym_jiminy/unit_py/test_quantities.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
ActuatedJointsPosition,
CenterOfMass,
CapturePoint,
ZeroMomentPoint)
ZeroMomentPoint,
MultiContactRelativeForceTangential)


class Quantities(unittest.TestCase):
Expand Down Expand Up @@ -523,3 +524,24 @@ def test_foot_relative_pose(self):

np.testing.assert_allclose(value[:3], pos_rel[:, :-1])
np.testing.assert_allclose(value[-4:], quat_rel[:, :-1])

def test_tangential_forces(self):
""" TODO: Write documentation
"""
env = gym.make("gym_jiminy.envs:atlas")

env.quantities["force_tangential_rel"] = (
MultiContactRelativeForceTangential, {})

env.reset(seed=0)
for _ in range(10):
env.step(env.action)

gravity = abs(env.robot.pinocchio_model.gravity.linear[2])
robot_mass = env.robot.pinocchio_data.mass[0]
force_tangential_rel = np.stack(tuple(
constraint.lambda_c[:2]
for constraint in env.robot.constraints.contact_frames.values()),
axis=-1) / (robot_mass * gravity)
np.testing.assert_allclose(
force_tangential_rel, env.quantities["force_tangential_rel"])
20 changes: 19 additions & 1 deletion python/gym_jiminy/unit_py/test_rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
TrackingCapturePointReward,
TrackingFootPositionsReward,
TrackingFootOrientationsReward,
MinimizeFrictionReward,
SurviveReward,
AdditiveMixtureReward)
from gym_jiminy.toolbox.compositions import (
Expand Down Expand Up @@ -74,9 +75,10 @@ def test_tracking(self):
np.testing.assert_allclose(
quantity_true.get(), self.env.robot_state.q[2])

gamma = - np.log(0.01) / cutoff ** 2
gamma = - np.log(CUTOFF_ESP) / cutoff ** 2
value = np.exp(- gamma * np.sum((reward.quantity.op(
quantity_true.get(), quantity_ref.get())) ** 2))
assert value > 0.01
np.testing.assert_allclose(reward(terminated, {}), value)

del reward
Expand Down Expand Up @@ -125,3 +127,19 @@ def test_stability(self):
np.testing.assert_allclose(tanh_normalization(
CUTOFF_OUTER, -CUTOFF_INNER, CUTOFF_OUTER), CUTOFF_ESP)
np.testing.assert_allclose(reward_stability(terminated, {}), value)

def test_friction(self):

Check notice on line 131 in python/gym_jiminy/unit_py/test_rewards.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

python/gym_jiminy/unit_py/test_rewards.py#L131

Missing function or method docstring
CUTOFF = 0.5

Check notice on line 132 in python/gym_jiminy/unit_py/test_rewards.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

python/gym_jiminy/unit_py/test_rewards.py#L132

Variable name "CUTOFF" doesn't conform to snake_case naming style
env = gym.make("gym_jiminy.envs:atlas-pid", debug=True)
reward_friction = MinimizeFrictionReward(env, cutoff=CUTOFF)
quantity = reward_friction.quantity

env.reset(seed=0)
_, _, terminated, _, _ = env.step(env.action)
force_tangential_rel = quantity.get()
force_tangential_rel_norm = np.sum(np.square(force_tangential_rel))

gamma = - np.log(CUTOFF_ESP) / CUTOFF ** 2
value = np.exp(- gamma * force_tangential_rel_norm)
assert value > 0.01
np.testing.assert_allclose(reward_friction(terminated, {}), value)

0 comments on commit 480d389

Please sign in to comment.