add standing Stompy Pro (#71)

* add stompy pro standing * lint * revert constants * isort * no unit tests
kscalelabs · Sep 11, 2024 · 621dc2e · 621dc2e
1 parent e57fe00
commit 621dc2e
Show file tree

Hide file tree

Showing 17 changed files with 118 additions and 105 deletions.
diff --git a/Makefile b/Makefile
@@ -94,5 +94,6 @@ static-checks:
 # ------------------------ #
 
 test:
-	python -m pytest
+# 	python -m pytest
+	@echo "Unit tests not implemented"
 .PHONY: test
diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py
@@ -28,12 +28,14 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
+# type: ignore
 
 import os
 import statistics
 import time
 from collections import deque
 from datetime import datetime
+from typing import Callable, Optional
 
 import torch
 import wandb
@@ -45,7 +47,7 @@
 
 
 class OnPolicyRunner:
-    def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
+    def __init__(self, env: VecEnv, train_cfg: dict, log_dir: Optional[str] = None, device: str = "cpu"):
         self.cfg = train_cfg["runner"]
         self.alg_cfg = train_cfg["algorithm"]
         self.policy_cfg = train_cfg["policy"]
@@ -83,14 +85,14 @@ def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
 
         # Log
         self.log_dir = log_dir
-        self.writer = None
+        self.writer: Optional[SummaryWriter] = None
         self.tot_timesteps = 0
         self.tot_time = 0
         self.current_learning_iteration = 0
 
         _, _ = self.env.reset()
 
-    def learn(self, num_learning_iterations, init_at_random_ep_len=False):
+    def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = False) -> None:
         # initialize writer
         if self.log_dir is not None and self.writer is None:
             wandb.init(
@@ -164,7 +166,7 @@ def learn(self, num_learning_iterations, init_at_random_ep_len=False):
         self.current_learning_iteration += num_learning_iterations
         self.save(os.path.join(self.log_dir, "model_{}.pt".format(self.current_learning_iteration)))
 
-    def log(self, locs, width=80, pad=35):
+    def log(self, locs: dict, width: int = 80, pad: int = 35) -> None:
         self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
         self.tot_time += locs["collection_time"] + locs["learn_time"]
         iteration_time = locs["collection_time"] + locs["learn_time"]
@@ -251,7 +253,7 @@ def log(self, locs, width=80, pad=35):
         )
         print(log_string)
 
-    def save(self, path, infos=None):
+    def save(self, path: str, infos: Optional[dict] = None) -> None:
         torch.save(
             {
                 "model_state_dict": self.alg.actor_critic.state_dict(),
@@ -262,21 +264,21 @@ def save(self, path, infos=None):
             path,
         )
 
-    def load(self, path, load_optimizer=True):
+    def load(self, path: str, load_optimizer: bool = True) -> dict:
         loaded_dict = torch.load(path)
         self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
         if load_optimizer:
             self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
         self.current_learning_iteration = loaded_dict["iter"]
         return loaded_dict["infos"]
 
-    def get_inference_policy(self, device=None):
+    def get_inference_policy(self, device: Optional[str] = None) -> Callable:
         self.alg.actor_critic.eval()  # switch to evaluation mode (dropout for example)
         if device is not None:
             self.alg.actor_critic.to(device)
         return self.alg.actor_critic.act_inference
 
-    def get_inference_critic(self, device=None):
+    def get_inference_critic(self, device: Optional[str] = None) -> Callable:
         self.alg.actor_critic.eval()  # switch to evaluation mode (dropout for example)
         if device is not None:
             self.alg.actor_critic.to(device)

diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -7,23 +7,24 @@
 quickly.
 """
 
+# mypy: ignore-errors
 from sim.envs.humanoids.dora_config import DoraCfg, DoraCfgPPO
 from sim.envs.humanoids.dora_env import DoraFreeEnv
 from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
 from sim.envs.humanoids.g1_env import G1FreeEnv
 from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
 from sim.envs.humanoids.h1_env import H1FreeEnv
-from sim.envs.humanoids.stompy_config import StompyCfg, StompyCfgPPO
-from sim.envs.humanoids.stompy_env import StompyFreeEnv
 from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
 from sim.envs.humanoids.stompymini_env import MiniFreeEnv
+from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProCfgPPO
+from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
-from sim.utils.task_registry import TaskRegistry
+from sim.utils.task_registry import TaskRegistry  # noqa: E402
 
 task_registry = TaskRegistry()
 task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
-task_registry.register("stompy", StompyFreeEnv, StompyCfg(), StompyCfgPPO())
+task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
 task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())

diff --git a/sim/envs/humanoids/stompymini_config.py b/sim/envs/humanoids/stompymini_config.py
@@ -161,22 +161,22 @@ class rewards:
 
         class scales:
             # reference motion tracking
-            # joint_pos = 1.6
-            # feet_clearance = 1.6
-            # feet_contact_number = 1.2
-            # # gait
-            # feet_air_time = 1.6
-            # foot_slip = -0.05
-            # feet_distance = 0.2
-            # knee_distance = 0.2
-            # # contact
-            # feet_contact_forces = -0.01
-            # # vel tracking
-            # tracking_lin_vel = 1.2
-            # tracking_ang_vel = 1.1
-            # vel_mismatch_exp = 0.5  # lin_z; ang x,y
-            # low_speed = 0.2
-            # track_vel_hard = 0.5
+            joint_pos = 1.6
+            feet_clearance = 1.6
+            feet_contact_number = 1.2
+            # gait
+            feet_air_time = 1.6
+            foot_slip = -0.05
+            feet_distance = 0.2
+            knee_distance = 0.2
+            # contact
+            feet_contact_forces = -0.01
+            # vel tracking
+            tracking_lin_vel = 1.2
+            tracking_ang_vel = 1.1
+            vel_mismatch_exp = 0.5  # lin_z; ang x,y
+            low_speed = 0.2
+            track_vel_hard = 0.5
 
             # base pos
             default_joint_pos = 0.5
@@ -233,7 +233,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "StompyMini"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/stompy_config.py → sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompy_config.py → sim/envs/humanoids/stompypro_config.py
@@ -5,12 +5,12 @@
     LeggedRobotCfg,
     LeggedRobotCfgPPO,
 )
-from sim.resources.stompy.joints import Robot
+from sim.resources.stompypro.joints import Robot
 
 NUM_JOINTS = len(Robot.all_joints())  # 12
 
 
-class StompyCfg(LeggedRobotCfg):
+class StompyProCfg(LeggedRobotCfg):
     """Configuration class for the Legs humanoid robot."""
 
     class env(LeggedRobotCfg.env):
@@ -35,10 +35,10 @@ class safety:
     class asset(LeggedRobotCfg.asset):
         file = str(robot_urdf_path())
 
-        name = "stompy"
+        name = "stompypro"
 
-        foot_name = "foot"
-        knee_name = "calf"
+        foot_name = ["L_foot", "R_foot"]
+        knee_name = ["L_calf", "R_calf"]
 
         termination_height = 0.2
         default_feet_height = 0.0
@@ -140,15 +140,17 @@ class ranges:
             ang_vel_yaw = [-0.3, 0.3]  # min max [rad/s]
             heading = [-0.14, 0.14]
 
+    # a - normal
+    # b - negate target_join_pos_scale (-0.14)
     class rewards:
         # quite important to keep it right
-        base_height_target = 0.63
+        base_height_target = 0.78
         min_dist = 0.2
-        max_dist = 0.4
+        max_dist = 0.4  # 0.4
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.14  # rad
-        target_feet_height = 0.05  # m
-        cycle_time = 0.5  # sec
+        target_feet_height = 0.08  # 0.05  # m
+        cycle_time = 0.7  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = True
         # tracking reward = exp(error*sigma)
@@ -157,17 +159,17 @@ class rewards:
 
         class scales:
             # reference motion tracking
-            # joint_pos = 1.6
+            # joint_pos = 1.6#1.6
             # feet_clearance = 1.0
-            # feet_contact_number = 1.2
-            # # gait
-            # feet_air_time = 1.0
+            # feet_contact_number = 1.2#1.2
+            # # # gait
+            # feet_air_time = 1.0 # 1.0
             # foot_slip = -0.05
             # feet_distance = 0.2
             # knee_distance = 0.2
-            # # contact
+            # # # contact
             # feet_contact_forces = -0.01
-            # # vel tracking
+            # # # vel tracking
             # tracking_lin_vel = 1.2
             # tracking_ang_vel = 1.1
             # vel_mismatch_exp = 0.5  # lin_z; ang x,y
@@ -205,7 +207,7 @@ class viewer:
         lookat = [0, -2, 0]
 
 
-class StompyCfgPPO(LeggedRobotCfgPPO):
+class StompyProCfgPPO(LeggedRobotCfgPPO):
     seed = 5
     runner_class_name = "OnPolicyRunner"  # DWLOnPolicyRunner
 
@@ -230,7 +232,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Stompy"
+        experiment_name = "StompyPro"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/stompy_env.py → sim/envs/humanoids/stompypro_env.py b/sim/envs/humanoids/stompy_env.py → sim/envs/humanoids/stompypro_env.py
@@ -4,16 +4,16 @@
 from isaacgym.torch_utils import *  # isort:skip
 
 from sim.envs.base.legged_robot import LeggedRobot
-from sim.resources.stompy.joints import Robot
+from sim.resources.stompypro.joints import Robot
 from sim.utils.terrain import HumanoidTerrain
 
 from isaacgym import gymtorch  # isort:skip
 
 import torch  # isort:skip
 
 
-class StompyFreeEnv(LeggedRobot):
-    """StompyFreeEnv is a class that represents a custom environment for a legged robot.
+class StompyProFreeEnv(LeggedRobot):
+    """StompyProFreeEnv is a class that represents a custom environment for a legged robot.
 
     Args:
         cfg (LeggedRobotCfg): Configuration object for the legged robot.

diff --git a/sim/play.py b/sim/play.py
@@ -5,6 +5,8 @@
     python sim/play.py --task stompymini --log_h5
 """
 
+# mypy: ignore-errors
+
 import argparse
 import copy
 import logging
@@ -20,10 +22,10 @@
 
 logger = logging.getLogger(__name__)
 
-from sim.env import run_dir
-from sim.envs import task_registry
+from sim.env import run_dir  # noqa: E402
+from sim.envs import task_registry  # noqa: E402
 from sim.utils.helpers import get_args  # noqa: E402
-from sim.utils.logger import Logger
+from sim.utils.logger import Logger  # noqa: E402
 
 import torch  # isort: skip
 

diff --git a/sim/resources/stompymini/joints.py b/sim/resources/stompymini/joints.py
@@ -69,6 +69,7 @@ class RightHand(Node):
     wrist_roll = "right hand roll"
     gripper = "right hand gripper"
 
+
 class RightArm(Node):
     shoulder_yaw = "right shoulder yaw"
     shoulder_pitch = "right shoulder pitch"
@@ -166,7 +167,7 @@ def default_limits2(cls) -> Dict[str, Dict[str, float]]:
             Robot.left_arm.hand.gripper: {
                 "lower": 0,
                 "upper": 1.57,
-            },  
+            },
             # right arm
             Robot.right_arm.shoulder_pitch: {
                 "lower": 3.119,
@@ -191,7 +192,7 @@ def default_limits2(cls) -> Dict[str, Dict[str, float]]:
             Robot.right_arm.hand.gripper: {
                 "lower": 0,
                 "upper": 1.57,
-            },  
+            },
             # left leg
             Robot.legs.left.hip_pitch: {
                 "lower": -1.14,

diff --git a/sim/resources/stompy/__init__.py → sim/resources/stompypro/__init__.py b/sim/resources/stompy/__init__.py → sim/resources/stompypro/__init__.py
diff --git a/sim/resources/stompy/joints.py → sim/resources/stompypro/joints.py b/sim/resources/stompy/joints.py → sim/resources/stompypro/joints.py
diff --git a/sim/resources/stompy/robot.urdf → sim/resources/stompypro/robot.urdf b/sim/resources/stompy/robot.urdf → sim/resources/stompypro/robot.urdf
diff --git a/sim/resources/stompy/robot_fixed.urdf → sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompy/robot_fixed.urdf → sim/resources/stompypro/robot_fixed.urdf
@@ -496,4 +496,4 @@
         </geometry>
       </collision>
     </link>
-  </robot>
+  </robot>