From 480d38958e412812104a1b84b4f94ecf06a1b862 Mon Sep 17 00:00:00 2001 From: Alexis Duburcq Date: Mon, 10 Jun 2024 22:18:29 +0200 Subject: [PATCH] [gym/common] Add ground friction minimization reward. --- .../common/compositions/__init__.py | 4 +- .../common/compositions/locomotion.py | 36 +++++++- .../gym_jiminy/common/compositions/mixin.py | 18 +++- .../gym_jiminy/common/quantities/__init__.py | 2 + .../common/quantities/locomotion.py | 92 ++++++++++++++++++- .../gym_jiminy/common/quantities/transform.py | 7 +- python/gym_jiminy/unit_py/test_misc.py | 3 +- python/gym_jiminy/unit_py/test_quantities.py | 24 ++++- python/gym_jiminy/unit_py/test_rewards.py | 20 +++- 9 files changed, 192 insertions(+), 14 deletions(-) diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py index 17d2f3521..a0ac75db7 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py @@ -12,7 +12,8 @@ TrackingCapturePointReward, TrackingFootPositionsReward, TrackingFootOrientationsReward, - MinimizeAngularMomentumReward) + MinimizeAngularMomentumReward, + MinimizeFrictionReward) __all__ = [ "CUTOFF_ESP", @@ -27,5 +28,6 @@ "TrackingFootPositionsReward", "TrackingFootOrientationsReward", "MinimizeAngularMomentumReward", + "MinimizeFrictionReward", "SurviveReward" ] diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py index 68044d684..6932e895f 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py @@ -9,8 +9,9 @@ from ..bases import ( InterfaceJiminyEnv, StateQuantity, QuantityEvalMode, BaseQuantityReward) from ..quantities import ( - MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity, - AverageBaseMomentum, MultiFootRelativeXYZQuat, CapturePoint) + MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity, CapturePoint, + MultiFootRelativeXYZQuat, MultiContactRelativeForceTangential, + AverageBaseMomentum) from ..quantities.locomotion import sanitize_foot_frame_names from ..utils import quat_difference @@ -221,3 +222,34 @@ def __init__(self, partial(radial_basis_function, cutoff=self.cutoff, order=2), is_normalized=True, is_terminal=False) + + +class MinimizeFrictionReward(BaseQuantityReward): + """Reward the agent for minimizing the tangential forces at all the contact + points and collision bodies, and to avoid jerky intermittent contact state. + + The L2-norm is used to aggregate all the local tangential forces. While the + L1-norm would be more natural in this specific cases, using the L2-norm is + preferable as it promotes space-time regularity, ie balancing the force + distribution evenly between all the candidate contact points and avoiding + jerky contact forces over time (high-frequency vibrations), phenomena to + which the L1-norm is completely insensitive. + """ + def __init__(self, + env: InterfaceJiminyEnv, + cutoff: float) -> None: + """ + :param env: Base or wrapped jiminy environment. + :param cutoff: Cutoff threshold for the RBF kernel transform. + """ + # Backup some user argument(s) + self.cutoff = cutoff + + # Call base implementation + super().__init__( + env, + "reward_friction", + (MultiContactRelativeForceTangential, dict()), + partial(radial_basis_function, cutoff=self.cutoff, order=2), + is_normalized=True, + is_terminal=False) diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py index ce0a90d19..453860d55 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py +++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py @@ -46,11 +46,21 @@ def radial_basis_function(error: ArrayOrScalar, :param cutoff: Cut-off threshold to consider. :param order: Order of Lp-Norm that will be used as distance metric. """ - error_ = np.asarray(error).reshape((-1,)) - if order == 2: - squared_dist_rel = np.dot(error_, error_) / math.pow(cutoff, 2) + error_ = np.atleast_1d(error) + if error_.flags.contiguous or order != 2: + if error_.ndim > 1 and not error_.flags.contiguous: + error_ = np.ascontiguousarray(error_) + if error_.flags.c_contiguous: + error1d = np.asarray(error_).ravel() + else: + error1d = np.asarray(error_.T).ravel() + if order == 2: + squared_dist_rel = np.dot(error1d, error1d) / math.pow(cutoff, 2) + else: + squared_dist_rel = math.pow( + np.linalg.norm(error1d, order) / cutoff, 2) else: - squared_dist_rel = math.pow(np.linalg.norm(error_, order) / cutoff, 2) + squared_dist_rel = np.sum(np.square(error_)) / math.pow(cutoff, 2) return math.pow(CUTOFF_ESP, squared_dist_rel) diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py index 09be1f41b..d22168ed0 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py @@ -27,6 +27,7 @@ CenterOfMass, CapturePoint, ZeroMomentPoint, + MultiContactRelativeForceTangential, translate_position_odom) @@ -58,5 +59,6 @@ 'CenterOfMass', 'CapturePoint', 'ZeroMomentPoint', + 'MultiContactRelativeForceTangential', 'translate_position_odom' ] diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py index 8d9c911d8..c84052403 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py @@ -8,7 +8,8 @@ import numba as nb import jiminy_py.core as jiminy -from jiminy_py.core import array_copyto # pylint: disable=no-name-in-module +from jiminy_py.core import ( # pylint: disable=no-name-in-module + array_copyto, multi_array_copyto) from jiminy_py.dynamics import update_quantities import pinocchio as pin @@ -902,3 +903,92 @@ def refresh(self) -> np.ndarray: translate_position_odom(self._dcm, self.odom_pose, self._dcm) return self._dcm + + +@dataclass(unsafe_hash=True) +class MultiContactRelativeForceTangential(InterfaceQuantity[np.ndarray]): + """Standardized tangential forces apply on all contact points and collision + bodies. + + The tangential force is rescaled by the weight of the robot rather than the + actual vertical force. It has the advantage to guarantee that the resulting + quantity is never poorly conditioned, which would be the case otherwise. + Moreover, the effect of the vertical force is not canceled out, which is + interesting for deriving a reward, as it allows for indirectly penalize + jerky contact states and violent impacts. The side effect is not being able + to guarantee that this quantity is bounded. Indeed, only the ratio of the + norm of the tangential force at every contact point (or the resulting one) + is bounded by the product of the friction coefficient by the vertical + force, not the tangential force itself. This issue is a minor inconvenience + as all it requires is normalization using RBF kernel to make it finite. + """ + + def __init__(self, + env: InterfaceJiminyEnv, + parent: Optional[InterfaceQuantity]) -> None: + """ + :param env: Base or wrapped jiminy environment. + :param parent: Higher-level quantity from which this quantity is a + requirement if any, `None` otherwise. + """ + # Call base implementation + super().__init__( + env, + parent, + requirements={}, + auto_refresh=False) + + # Weight of the robot + self._robot_weight: float = -1 + + # Stacked tangential forces on all contact points and collision bodies + self._force_tangential_batch = np.array([]) + + # Define proxy for views of individual tangential forces in the stack + self._force_tangential_views: Tuple[np.ndarray, ...] = () + + # Define proxy for the current tangential forces + self._force_tangential_refs: Tuple[np.ndarray, ...] = () + + def initialize(self) -> None: + # Call base implementation + super().initialize() + + # Compute the weight of the robot + gravity = abs(self.env.robot.pinocchio_model.gravity.linear[2]) + robot_mass = self.env.robot.pinocchio_data.mass[0] + self._robot_weight = robot_mass * gravity + + # Get all frame constraints associated with contacts and collisions + frame_constraints: Sequence[jiminy.FrameConstraint] = [] + for constraint in self.env.robot.constraints.contact_frames.values(): + assert isinstance(constraint, jiminy.FrameConstraint) + frame_constraints.append(constraint) + for constraints_body in self.env.robot.constraints.collision_bodies: + for constraint in constraints_body: + assert isinstance(constraint, jiminy.FrameConstraint) + frame_constraints.append(constraint) + + # Extract references to all the tangential forces + force_tangential_refs = [] + for constraint in frame_constraints: + # f_lin, f_ang = lambda[:3], np.array([0.0, 0.0, lambda[3]]) + force_tangential_refs.append(constraint.lambda_c[:2]) + self._force_tangential_refs = tuple(force_tangential_refs) + + # Pre-allocated memory for stacked tangential forces + self._force_tangential_batch = np.zeros( + (2, len(self._force_tangential_refs)), order='F') + + # Refresh proxies + self._force_tangential_views = tuple(self._force_tangential_batch.T) + + def refresh(self) -> np.ndarray: + # Copy all tangential forces in contiguous buffer + multi_array_copyto(self._force_tangential_views, + self._force_tangential_refs) + + # Scale the tangential forces by the weight of the robot + self._force_tangential_batch /= self._robot_weight + + return self._force_tangential_batch diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py index f879e7c87..f471af826 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py @@ -116,7 +116,8 @@ class MaskedQuantity(InterfaceQuantity[np.ndarray]): Elements will be extract by copy unless the indices of the elements to extract to be written equivalently by a slice (ie they are evenly spaced), and the array can be flattened while preserving memory contiguity if 'axis' - is `None`. + is `None`, which means that the result will be different between C- and F- + contiguous arrays. """ quantity: InterfaceQuantity @@ -204,9 +205,9 @@ def refresh(self) -> np.ndarray: # Notably, `operator[]` supports boolean mask but `take` does not. return self.data.take(self.indices, self.axis) if self.axis is None: - # `reshape` must be used instead of `flat` to get a view that can + # `ravel` must be used instead of `flat` to get a view that can # be sliced without copy. - return self.data.reshape((-1,))[self._slices] + return self.data.ravel(order="K")[self._slices] return self.data[self._slices] diff --git a/python/gym_jiminy/unit_py/test_misc.py b/python/gym_jiminy/unit_py/test_misc.py index 63a7a41be..8baac24de 100644 --- a/python/gym_jiminy/unit_py/test_misc.py +++ b/python/gym_jiminy/unit_py/test_misc.py @@ -190,4 +190,5 @@ def test_math(self): np.testing.assert_allclose( rpy_i, pin.rpy.matrixToRpy(rot_mat_i.reshape((3, 3)))) np.testing.assert_allclose(math.rpy_to_quat(rpy), quat) - np.testing.assert_allclose(math.matrix_to_quat(rot_mat), quat) + np.testing.assert_allclose( + math.matrix_to_quat(rot_mat), quat, atol=1e-9) diff --git a/python/gym_jiminy/unit_py/test_quantities.py b/python/gym_jiminy/unit_py/test_quantities.py index 1290b1045..b57175047 100644 --- a/python/gym_jiminy/unit_py/test_quantities.py +++ b/python/gym_jiminy/unit_py/test_quantities.py @@ -30,7 +30,8 @@ ActuatedJointsPosition, CenterOfMass, CapturePoint, - ZeroMomentPoint) + ZeroMomentPoint, + MultiContactRelativeForceTangential) class Quantities(unittest.TestCase): @@ -523,3 +524,24 @@ def test_foot_relative_pose(self): np.testing.assert_allclose(value[:3], pos_rel[:, :-1]) np.testing.assert_allclose(value[-4:], quat_rel[:, :-1]) + + def test_tangential_forces(self): + """ TODO: Write documentation + """ + env = gym.make("gym_jiminy.envs:atlas") + + env.quantities["force_tangential_rel"] = ( + MultiContactRelativeForceTangential, {}) + + env.reset(seed=0) + for _ in range(10): + env.step(env.action) + + gravity = abs(env.robot.pinocchio_model.gravity.linear[2]) + robot_mass = env.robot.pinocchio_data.mass[0] + force_tangential_rel = np.stack(tuple( + constraint.lambda_c[:2] + for constraint in env.robot.constraints.contact_frames.values()), + axis=-1) / (robot_mass * gravity) + np.testing.assert_allclose( + force_tangential_rel, env.quantities["force_tangential_rel"]) diff --git a/python/gym_jiminy/unit_py/test_rewards.py b/python/gym_jiminy/unit_py/test_rewards.py index acaa43124..096311b1f 100644 --- a/python/gym_jiminy/unit_py/test_rewards.py +++ b/python/gym_jiminy/unit_py/test_rewards.py @@ -16,6 +16,7 @@ TrackingCapturePointReward, TrackingFootPositionsReward, TrackingFootOrientationsReward, + MinimizeFrictionReward, SurviveReward, AdditiveMixtureReward) from gym_jiminy.toolbox.compositions import ( @@ -74,9 +75,10 @@ def test_tracking(self): np.testing.assert_allclose( quantity_true.get(), self.env.robot_state.q[2]) - gamma = - np.log(0.01) / cutoff ** 2 + gamma = - np.log(CUTOFF_ESP) / cutoff ** 2 value = np.exp(- gamma * np.sum((reward.quantity.op( quantity_true.get(), quantity_ref.get())) ** 2)) + assert value > 0.01 np.testing.assert_allclose(reward(terminated, {}), value) del reward @@ -125,3 +127,19 @@ def test_stability(self): np.testing.assert_allclose(tanh_normalization( CUTOFF_OUTER, -CUTOFF_INNER, CUTOFF_OUTER), CUTOFF_ESP) np.testing.assert_allclose(reward_stability(terminated, {}), value) + + def test_friction(self): + CUTOFF = 0.5 + env = gym.make("gym_jiminy.envs:atlas-pid", debug=True) + reward_friction = MinimizeFrictionReward(env, cutoff=CUTOFF) + quantity = reward_friction.quantity + + env.reset(seed=0) + _, _, terminated, _, _ = env.step(env.action) + force_tangential_rel = quantity.get() + force_tangential_rel_norm = np.sum(np.square(force_tangential_rel)) + + gamma = - np.log(CUTOFF_ESP) / CUTOFF ** 2 + value = np.exp(- gamma * force_tangential_rel_norm) + assert value > 0.01 + np.testing.assert_allclose(reward_friction(terminated, {}), value)