Skip to content

Commit

Permalink
add standing Stompy Pro (#71)
Browse files Browse the repository at this point in the history
* add stompy pro standing

* lint

* revert constants

* isort

* no unit tests
  • Loading branch information
WT-MM authored Sep 11, 2024
1 parent e57fe00 commit 621dc2e
Show file tree
Hide file tree
Showing 17 changed files with 118 additions and 105 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,6 @@ static-checks:
# ------------------------ #

test:
python -m pytest
# python -m pytest
@echo "Unit tests not implemented"
.PHONY: test
18 changes: 10 additions & 8 deletions sim/algo/ppo/on_policy_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
# type: ignore

import os
import statistics
import time
from collections import deque
from datetime import datetime
from typing import Callable, Optional

import torch
import wandb
Expand All @@ -45,7 +47,7 @@


class OnPolicyRunner:
def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
def __init__(self, env: VecEnv, train_cfg: dict, log_dir: Optional[str] = None, device: str = "cpu"):
self.cfg = train_cfg["runner"]
self.alg_cfg = train_cfg["algorithm"]
self.policy_cfg = train_cfg["policy"]
Expand Down Expand Up @@ -83,14 +85,14 @@ def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):

# Log
self.log_dir = log_dir
self.writer = None
self.writer: Optional[SummaryWriter] = None
self.tot_timesteps = 0
self.tot_time = 0
self.current_learning_iteration = 0

_, _ = self.env.reset()

def learn(self, num_learning_iterations, init_at_random_ep_len=False):
def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = False) -> None:
# initialize writer
if self.log_dir is not None and self.writer is None:
wandb.init(
Expand Down Expand Up @@ -164,7 +166,7 @@ def learn(self, num_learning_iterations, init_at_random_ep_len=False):
self.current_learning_iteration += num_learning_iterations
self.save(os.path.join(self.log_dir, "model_{}.pt".format(self.current_learning_iteration)))

def log(self, locs, width=80, pad=35):
def log(self, locs: dict, width: int = 80, pad: int = 35) -> None:
self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
self.tot_time += locs["collection_time"] + locs["learn_time"]
iteration_time = locs["collection_time"] + locs["learn_time"]
Expand Down Expand Up @@ -251,7 +253,7 @@ def log(self, locs, width=80, pad=35):
)
print(log_string)

def save(self, path, infos=None):
def save(self, path: str, infos: Optional[dict] = None) -> None:
torch.save(
{
"model_state_dict": self.alg.actor_critic.state_dict(),
Expand All @@ -262,21 +264,21 @@ def save(self, path, infos=None):
path,
)

def load(self, path, load_optimizer=True):
def load(self, path: str, load_optimizer: bool = True) -> dict:
loaded_dict = torch.load(path)
self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
if load_optimizer:
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
self.current_learning_iteration = loaded_dict["iter"]
return loaded_dict["infos"]

def get_inference_policy(self, device=None):
def get_inference_policy(self, device: Optional[str] = None) -> Callable:
self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
if device is not None:
self.alg.actor_critic.to(device)
return self.alg.actor_critic.act_inference

def get_inference_critic(self, device=None):
def get_inference_critic(self, device: Optional[str] = None) -> Callable:
self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
if device is not None:
self.alg.actor_critic.to(device)
Expand Down
9 changes: 5 additions & 4 deletions sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,24 @@
quickly.
"""

# mypy: ignore-errors
from sim.envs.humanoids.dora_config import DoraCfg, DoraCfgPPO
from sim.envs.humanoids.dora_env import DoraFreeEnv
from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
from sim.envs.humanoids.g1_env import G1FreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.stompy_config import StompyCfg, StompyCfgPPO
from sim.envs.humanoids.stompy_env import StompyFreeEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProCfgPPO
from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
from sim.utils.task_registry import TaskRegistry
from sim.utils.task_registry import TaskRegistry # noqa: E402

task_registry = TaskRegistry()
task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
task_registry.register("stompy", StompyFreeEnv, StompyCfg(), StompyCfgPPO())
task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
Expand Down
34 changes: 17 additions & 17 deletions sim/envs/humanoids/stompymini_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,22 @@ class rewards:

class scales:
# reference motion tracking
# joint_pos = 1.6
# feet_clearance = 1.6
# feet_contact_number = 1.2
# # gait
# feet_air_time = 1.6
# foot_slip = -0.05
# feet_distance = 0.2
# knee_distance = 0.2
# # contact
# feet_contact_forces = -0.01
# # vel tracking
# tracking_lin_vel = 1.2
# tracking_ang_vel = 1.1
# vel_mismatch_exp = 0.5 # lin_z; ang x,y
# low_speed = 0.2
# track_vel_hard = 0.5
joint_pos = 1.6
feet_clearance = 1.6
feet_contact_number = 1.2
# gait
feet_air_time = 1.6
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
# contact
feet_contact_forces = -0.01
# vel tracking
tracking_lin_vel = 1.2
tracking_ang_vel = 1.1
vel_mismatch_exp = 0.5 # lin_z; ang x,y
low_speed = 0.2
track_vel_hard = 0.5

# base pos
default_joint_pos = 0.5
Expand Down Expand Up @@ -233,7 +233,7 @@ class runner:

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "Full"
experiment_name = "StompyMini"
run_name = ""
# load and resume
resume = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
LeggedRobotCfg,
LeggedRobotCfgPPO,
)
from sim.resources.stompy.joints import Robot
from sim.resources.stompypro.joints import Robot

NUM_JOINTS = len(Robot.all_joints()) # 12


class StompyCfg(LeggedRobotCfg):
class StompyProCfg(LeggedRobotCfg):
"""Configuration class for the Legs humanoid robot."""

class env(LeggedRobotCfg.env):
Expand All @@ -35,10 +35,10 @@ class safety:
class asset(LeggedRobotCfg.asset):
file = str(robot_urdf_path())

name = "stompy"
name = "stompypro"

foot_name = "foot"
knee_name = "calf"
foot_name = ["L_foot", "R_foot"]
knee_name = ["L_calf", "R_calf"]

termination_height = 0.2
default_feet_height = 0.0
Expand Down Expand Up @@ -140,15 +140,17 @@ class ranges:
ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
heading = [-0.14, 0.14]

# a - normal
# b - negate target_join_pos_scale (-0.14)
class rewards:
# quite important to keep it right
base_height_target = 0.63
base_height_target = 0.78
min_dist = 0.2
max_dist = 0.4
max_dist = 0.4 # 0.4
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.14 # rad
target_feet_height = 0.05 # m
cycle_time = 0.5 # sec
target_feet_height = 0.08 # 0.05 # m
cycle_time = 0.7 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
Expand All @@ -157,17 +159,17 @@ class rewards:

class scales:
# reference motion tracking
# joint_pos = 1.6
# joint_pos = 1.6#1.6
# feet_clearance = 1.0
# feet_contact_number = 1.2
# # gait
# feet_air_time = 1.0
# feet_contact_number = 1.2#1.2
# # # gait
# feet_air_time = 1.0 # 1.0
# foot_slip = -0.05
# feet_distance = 0.2
# knee_distance = 0.2
# # contact
# # # contact
# feet_contact_forces = -0.01
# # vel tracking
# # # vel tracking
# tracking_lin_vel = 1.2
# tracking_ang_vel = 1.1
# vel_mismatch_exp = 0.5 # lin_z; ang x,y
Expand Down Expand Up @@ -205,7 +207,7 @@ class viewer:
lookat = [0, -2, 0]


class StompyCfgPPO(LeggedRobotCfgPPO):
class StompyProCfgPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner

Expand All @@ -230,7 +232,7 @@ class runner:

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "Stompy"
experiment_name = "StompyPro"
run_name = ""
# load and resume
resume = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from isaacgym.torch_utils import * # isort:skip

from sim.envs.base.legged_robot import LeggedRobot
from sim.resources.stompy.joints import Robot
from sim.resources.stompypro.joints import Robot
from sim.utils.terrain import HumanoidTerrain

from isaacgym import gymtorch # isort:skip

import torch # isort:skip


class StompyFreeEnv(LeggedRobot):
"""StompyFreeEnv is a class that represents a custom environment for a legged robot.
class StompyProFreeEnv(LeggedRobot):
"""StompyProFreeEnv is a class that represents a custom environment for a legged robot.
Args:
cfg (LeggedRobotCfg): Configuration object for the legged robot.
Expand Down
8 changes: 5 additions & 3 deletions sim/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
python sim/play.py --task stompymini --log_h5
"""

# mypy: ignore-errors

import argparse
import copy
import logging
Expand All @@ -20,10 +22,10 @@

logger = logging.getLogger(__name__)

from sim.env import run_dir
from sim.envs import task_registry
from sim.env import run_dir # noqa: E402
from sim.envs import task_registry # noqa: E402
from sim.utils.helpers import get_args # noqa: E402
from sim.utils.logger import Logger
from sim.utils.logger import Logger # noqa: E402

import torch # isort: skip

Expand Down
5 changes: 3 additions & 2 deletions sim/resources/stompymini/joints.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class RightHand(Node):
wrist_roll = "right hand roll"
gripper = "right hand gripper"


class RightArm(Node):
shoulder_yaw = "right shoulder yaw"
shoulder_pitch = "right shoulder pitch"
Expand Down Expand Up @@ -166,7 +167,7 @@ def default_limits2(cls) -> Dict[str, Dict[str, float]]:
Robot.left_arm.hand.gripper: {
"lower": 0,
"upper": 1.57,
},
},
# right arm
Robot.right_arm.shoulder_pitch: {
"lower": 3.119,
Expand All @@ -191,7 +192,7 @@ def default_limits2(cls) -> Dict[str, Dict[str, float]]:
Robot.right_arm.hand.gripper: {
"lower": 0,
"upper": 1.57,
},
},
# left leg
Robot.legs.left.hip_pitch: {
"lower": -1.14,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -496,4 +496,4 @@
</geometry>
</collision>
</link>
</robot>
</robot>
Loading

0 comments on commit 621dc2e

Please sign in to comment.