From 1346315157aa2a9c1b587ceb848cae48264a963a Mon Sep 17 00:00:00 2001
From: Alexis Duburcq <alexis.duburcq@gmail.com>
Date: Mon, 10 Jun 2024 22:18:29 +0200
Subject: [PATCH] [gym/common] Add ground friction minimization reward.

---
 .../common/compositions/__init__.py           |  4 +-
 .../common/compositions/locomotion.py         | 36 +++++++-
 .../gym_jiminy/common/compositions/mixin.py   | 19 +++-
 .../gym_jiminy/common/quantities/__init__.py  |  2 +
 .../common/quantities/locomotion.py           | 92 ++++++++++++++++++-
 .../gym_jiminy/common/quantities/transform.py |  7 +-
 python/gym_jiminy/unit_py/test_misc.py        |  3 +-
 python/gym_jiminy/unit_py/test_quantities.py  | 24 ++++-
 python/gym_jiminy/unit_py/test_rewards.py     | 20 +++-
 9 files changed, 193 insertions(+), 14 deletions(-)

diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py
index 17d2f3521..a0ac75db7 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py
@@ -12,7 +12,8 @@
                          TrackingCapturePointReward,
                          TrackingFootPositionsReward,
                          TrackingFootOrientationsReward,
-                         MinimizeAngularMomentumReward)
+                         MinimizeAngularMomentumReward,
+                         MinimizeFrictionReward)
 
 __all__ = [
     "CUTOFF_ESP",
@@ -27,5 +28,6 @@
     "TrackingFootPositionsReward",
     "TrackingFootOrientationsReward",
     "MinimizeAngularMomentumReward",
+    "MinimizeFrictionReward",
     "SurviveReward"
 ]
diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py
index 68044d684..6932e895f 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/locomotion.py
@@ -9,8 +9,9 @@
 from ..bases import (
     InterfaceJiminyEnv, StateQuantity, QuantityEvalMode, BaseQuantityReward)
 from ..quantities import (
-    MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity,
-    AverageBaseMomentum, MultiFootRelativeXYZQuat, CapturePoint)
+    MaskedQuantity, UnaryOpQuantity, AverageBaseOdometryVelocity, CapturePoint,
+    MultiFootRelativeXYZQuat, MultiContactRelativeForceTangential,
+    AverageBaseMomentum)
 from ..quantities.locomotion import sanitize_foot_frame_names
 from ..utils import quat_difference
 
@@ -221,3 +222,34 @@ def __init__(self,
             partial(radial_basis_function, cutoff=self.cutoff, order=2),
             is_normalized=True,
             is_terminal=False)
+
+
+class MinimizeFrictionReward(BaseQuantityReward):
+    """Reward the agent for minimizing the tangential forces at all the contact
+    points and collision bodies, and to avoid jerky intermittent contact state.
+
+    The L2-norm is used to aggregate all the local tangential forces. While the
+    L1-norm would be more natural in this specific cases, using the L2-norm is
+    preferable as it promotes space-time regularity, ie balancing the  force
+    distribution evenly between all the candidate contact points and avoiding
+    jerky contact forces over time (high-frequency vibrations),  phenomena to
+    which the L1-norm is completely insensitive.
+    """
+    def __init__(self,
+                 env: InterfaceJiminyEnv,
+                 cutoff: float) -> None:
+        """
+        :param env: Base or wrapped jiminy environment.
+        :param cutoff: Cutoff threshold for the RBF kernel transform.
+        """
+        # Backup some user argument(s)
+        self.cutoff = cutoff
+
+        # Call base implementation
+        super().__init__(
+            env,
+            "reward_friction",
+            (MultiContactRelativeForceTangential, dict()),
+            partial(radial_basis_function, cutoff=self.cutoff, order=2),
+            is_normalized=True,
+            is_terminal=False)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py
index ce0a90d19..80bb0dab8 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py
@@ -46,11 +46,22 @@ def radial_basis_function(error: ArrayOrScalar,
     :param cutoff: Cut-off threshold to consider.
     :param order: Order of Lp-Norm that will be used as distance metric.
     """
-    error_ = np.asarray(error).reshape((-1,))
-    if order == 2:
-        squared_dist_rel = np.dot(error_, error_) / math.pow(cutoff, 2)
+    error_ = np.asarray(error)
+    is_contiguous = error_.flags.f_contiguous or error_.flags.c_contiguous
+    if is_contiguous or order != 2:
+        if error_.ndim > 1 and not is_contiguous:
+            error_ = np.ascontiguousarray(error_)
+        if error_.flags.c_contiguous:
+            error1d = np.asarray(error_).ravel()
+        else:
+            error1d = np.asarray(error_.T).ravel()
+        if order == 2:
+            squared_dist_rel = np.dot(error1d, error1d) / math.pow(cutoff, 2)
+        else:
+            squared_dist_rel = math.pow(
+                np.linalg.norm(error1d, order) / cutoff, 2)
     else:
-        squared_dist_rel = math.pow(np.linalg.norm(error_, order) / cutoff, 2)
+        squared_dist_rel = np.sum(np.square(error_)) / math.pow(cutoff, 2)
     return math.pow(CUTOFF_ESP, squared_dist_rel)
 
 
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
index 09be1f41b..d22168ed0 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
@@ -27,6 +27,7 @@
                          CenterOfMass,
                          CapturePoint,
                          ZeroMomentPoint,
+                         MultiContactRelativeForceTangential,
                          translate_position_odom)
 
 
@@ -58,5 +59,6 @@
     'CenterOfMass',
     'CapturePoint',
     'ZeroMomentPoint',
+    'MultiContactRelativeForceTangential',
     'translate_position_odom'
 ]
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
index 8d9c911d8..c84052403 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
@@ -8,7 +8,8 @@
 import numba as nb
 
 import jiminy_py.core as jiminy
-from jiminy_py.core import array_copyto  # pylint: disable=no-name-in-module
+from jiminy_py.core import (  # pylint: disable=no-name-in-module
+    array_copyto, multi_array_copyto)
 from jiminy_py.dynamics import update_quantities
 import pinocchio as pin
 
@@ -902,3 +903,92 @@ def refresh(self) -> np.ndarray:
             translate_position_odom(self._dcm, self.odom_pose, self._dcm)
 
         return self._dcm
+
+
+@dataclass(unsafe_hash=True)
+class MultiContactRelativeForceTangential(InterfaceQuantity[np.ndarray]):
+    """Standardized tangential forces apply on all contact points and collision
+    bodies.
+
+    The tangential force is rescaled by the weight of the robot rather than the
+    actual vertical force. It has the advantage to guarantee that the resulting
+    quantity is never poorly conditioned, which would be the case otherwise.
+    Moreover, the effect of the vertical force is not canceled out, which is
+    interesting for deriving a reward, as it allows for indirectly penalize
+    jerky contact states and violent impacts. The side effect is not being able
+    to guarantee that this quantity is bounded. Indeed, only the ratio of the
+    norm of the tangential force at every contact point (or the resulting one)
+    is bounded by the product of the friction coefficient by the vertical
+    force, not the tangential force itself. This issue is a minor inconvenience
+    as all it requires is normalization using RBF kernel to make it finite.
+    """
+
+    def __init__(self,
+                 env: InterfaceJiminyEnv,
+                 parent: Optional[InterfaceQuantity]) -> None:
+        """
+        :param env: Base or wrapped jiminy environment.
+        :param parent: Higher-level quantity from which this quantity is a
+                       requirement if any, `None` otherwise.
+        """
+        # Call base implementation
+        super().__init__(
+            env,
+            parent,
+            requirements={},
+            auto_refresh=False)
+
+        # Weight of the robot
+        self._robot_weight: float = -1
+
+        # Stacked tangential forces on all contact points and collision bodies
+        self._force_tangential_batch = np.array([])
+
+        # Define proxy for views of individual tangential forces in the stack
+        self._force_tangential_views: Tuple[np.ndarray, ...] = ()
+
+        # Define proxy for the current tangential forces
+        self._force_tangential_refs: Tuple[np.ndarray, ...] = ()
+
+    def initialize(self) -> None:
+        # Call base implementation
+        super().initialize()
+
+        # Compute the weight of the robot
+        gravity = abs(self.env.robot.pinocchio_model.gravity.linear[2])
+        robot_mass = self.env.robot.pinocchio_data.mass[0]
+        self._robot_weight = robot_mass * gravity
+
+        # Get all frame constraints associated with contacts and collisions
+        frame_constraints: Sequence[jiminy.FrameConstraint] = []
+        for constraint in self.env.robot.constraints.contact_frames.values():
+            assert isinstance(constraint, jiminy.FrameConstraint)
+            frame_constraints.append(constraint)
+        for constraints_body in self.env.robot.constraints.collision_bodies:
+            for constraint in constraints_body:
+                assert isinstance(constraint, jiminy.FrameConstraint)
+                frame_constraints.append(constraint)
+
+        # Extract references to all the tangential forces
+        force_tangential_refs = []
+        for constraint in frame_constraints:
+            # f_lin, f_ang = lambda[:3], np.array([0.0, 0.0, lambda[3]])
+            force_tangential_refs.append(constraint.lambda_c[:2])
+        self._force_tangential_refs = tuple(force_tangential_refs)
+
+        # Pre-allocated memory for stacked tangential forces
+        self._force_tangential_batch = np.zeros(
+            (2, len(self._force_tangential_refs)), order='F')
+
+        # Refresh proxies
+        self._force_tangential_views = tuple(self._force_tangential_batch.T)
+
+    def refresh(self) -> np.ndarray:
+        # Copy all tangential forces in contiguous buffer
+        multi_array_copyto(self._force_tangential_views,
+                           self._force_tangential_refs)
+
+        # Scale the tangential forces by the weight of the robot
+        self._force_tangential_batch /= self._robot_weight
+
+        return self._force_tangential_batch
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py
index f879e7c87..f471af826 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/transform.py
@@ -116,7 +116,8 @@ class MaskedQuantity(InterfaceQuantity[np.ndarray]):
     Elements will be extract by copy unless the indices of the elements to
     extract to be written equivalently by a slice (ie they are evenly spaced),
     and the array can be flattened while preserving memory contiguity if 'axis'
-    is `None`.
+    is `None`, which means that the result will be different between C- and F-
+    contiguous arrays.
     """
 
     quantity: InterfaceQuantity
@@ -204,9 +205,9 @@ def refresh(self) -> np.ndarray:
             # Notably, `operator[]` supports boolean mask but `take` does not.
             return self.data.take(self.indices, self.axis)
         if self.axis is None:
-            # `reshape` must be used instead of `flat` to get a view that can
+            # `ravel` must be used instead of `flat` to get a view that can
             # be sliced without copy.
-            return self.data.reshape((-1,))[self._slices]
+            return self.data.ravel(order="K")[self._slices]
         return self.data[self._slices]
 
 
diff --git a/python/gym_jiminy/unit_py/test_misc.py b/python/gym_jiminy/unit_py/test_misc.py
index 63a7a41be..8baac24de 100644
--- a/python/gym_jiminy/unit_py/test_misc.py
+++ b/python/gym_jiminy/unit_py/test_misc.py
@@ -190,4 +190,5 @@ def test_math(self):
                 np.testing.assert_allclose(
                     rpy_i, pin.rpy.matrixToRpy(rot_mat_i.reshape((3, 3))))
             np.testing.assert_allclose(math.rpy_to_quat(rpy), quat)
-            np.testing.assert_allclose(math.matrix_to_quat(rot_mat), quat)
+            np.testing.assert_allclose(
+                math.matrix_to_quat(rot_mat), quat, atol=1e-9)
diff --git a/python/gym_jiminy/unit_py/test_quantities.py b/python/gym_jiminy/unit_py/test_quantities.py
index 1290b1045..b57175047 100644
--- a/python/gym_jiminy/unit_py/test_quantities.py
+++ b/python/gym_jiminy/unit_py/test_quantities.py
@@ -30,7 +30,8 @@
     ActuatedJointsPosition,
     CenterOfMass,
     CapturePoint,
-    ZeroMomentPoint)
+    ZeroMomentPoint,
+    MultiContactRelativeForceTangential)
 
 
 class Quantities(unittest.TestCase):
@@ -523,3 +524,24 @@ def test_foot_relative_pose(self):
 
         np.testing.assert_allclose(value[:3], pos_rel[:, :-1])
         np.testing.assert_allclose(value[-4:], quat_rel[:, :-1])
+
+    def test_tangential_forces(self):
+        """ TODO: Write documentation
+        """
+        env = gym.make("gym_jiminy.envs:atlas")
+
+        env.quantities["force_tangential_rel"] = (
+            MultiContactRelativeForceTangential, {})
+
+        env.reset(seed=0)
+        for _ in range(10):
+            env.step(env.action)
+
+        gravity = abs(env.robot.pinocchio_model.gravity.linear[2])
+        robot_mass = env.robot.pinocchio_data.mass[0]
+        force_tangential_rel = np.stack(tuple(
+            constraint.lambda_c[:2]
+            for constraint in env.robot.constraints.contact_frames.values()),
+            axis=-1) / (robot_mass * gravity)
+        np.testing.assert_allclose(
+            force_tangential_rel, env.quantities["force_tangential_rel"])
diff --git a/python/gym_jiminy/unit_py/test_rewards.py b/python/gym_jiminy/unit_py/test_rewards.py
index acaa43124..096311b1f 100644
--- a/python/gym_jiminy/unit_py/test_rewards.py
+++ b/python/gym_jiminy/unit_py/test_rewards.py
@@ -16,6 +16,7 @@
     TrackingCapturePointReward,
     TrackingFootPositionsReward,
     TrackingFootOrientationsReward,
+    MinimizeFrictionReward,
     SurviveReward,
     AdditiveMixtureReward)
 from gym_jiminy.toolbox.compositions import (
@@ -74,9 +75,10 @@ def test_tracking(self):
                 np.testing.assert_allclose(
                     quantity_true.get(), self.env.robot_state.q[2])
 
-            gamma = - np.log(0.01) / cutoff ** 2
+            gamma = - np.log(CUTOFF_ESP) / cutoff ** 2
             value = np.exp(- gamma * np.sum((reward.quantity.op(
                 quantity_true.get(), quantity_ref.get())) ** 2))
+            assert value > 0.01
             np.testing.assert_allclose(reward(terminated, {}), value)
 
             del reward
@@ -125,3 +127,19 @@ def test_stability(self):
         np.testing.assert_allclose(tanh_normalization(
             CUTOFF_OUTER, -CUTOFF_INNER, CUTOFF_OUTER), CUTOFF_ESP)
         np.testing.assert_allclose(reward_stability(terminated, {}), value)
+
+    def test_friction(self):
+        CUTOFF = 0.5
+        env = gym.make("gym_jiminy.envs:atlas-pid", debug=True)
+        reward_friction = MinimizeFrictionReward(env, cutoff=CUTOFF)
+        quantity = reward_friction.quantity
+
+        env.reset(seed=0)
+        _, _, terminated, _, _ = env.step(env.action)
+        force_tangential_rel = quantity.get()
+        force_tangential_rel_norm = np.sum(np.square(force_tangential_rel))
+
+        gamma = - np.log(CUTOFF_ESP) / CUTOFF ** 2
+        value = np.exp(- gamma * force_tangential_rel_norm)
+        assert value > 0.01
+        np.testing.assert_allclose(reward_friction(terminated, {}), value)