rl_测试液体相关以及离心机放置试管v1

This commit is contained in:
2026-03-04 10:09:44 +08:00
parent 9325184f4d
commit a9cef174f5
23 changed files with 2654 additions and 33 deletions

View File

@@ -8,5 +8,6 @@
"ms-iot.vscode-ros",
"ms-python.black-formatter",
"ms-python.flake8",
"saoudrizwan.claude-dev",
]
}

129
create_liquid.py Normal file
View File

@@ -0,0 +1,129 @@
import omni.isaac.core.utils.prims as prim_utils
from omni.isaac.core.simulation_context import SimulationContext
from omni.isaac.core.utils.stage import get_current_stage
from pxr import UsdGeom, UsdPhysics, PhysxSchema, Gf, Sdf
# 引入 PhysX 粒子工具 (Omniverse 内置库)
import omni.physx.scripts.utils as physxUtils
import omni.physx.scripts.particleUtils as particleUtils
def create_fluid_scene():
stage = get_current_stage()
# 1. 初始化仿真环境
sim = SimulationContext()
# 确保物理频率适合流体 (流体通常需要更小的步长或更多的子步)
sim.set_simulation_dt(physics_dt=1.0 / 60.0, rendering_dt=1.0 / 60.0)
# 2. 创建粒子系统 (Particle System)
# 这是管理所有流体粒子的容器
particle_system_path = "/World/FluidSystem"
particleUtils.add_physx_particle_system(
stage=stage,
particle_system_path=particle_system_path,
simulation_owner=None,
radius=0.02, # 粒子半径(决定流体分辨率)
contact_offset=0.03,
solid_rest_offset=0.02,
fluid_rest_offset=0.02,
mode="fluid" # 设置为流体模式
)
# 3. 创建流体材质 (定义粘度)
# 对于像蜂蜜或洗发水这样的"带流性"液体,需要高粘度
material_path = "/World/FluidMaterial"
particleUtils.add_pbd_particle_material(
stage=stage,
path=material_path,
cohesion=0.02, # 内聚力 (让液体聚在一起)
viscosity=0.5, # 粘度 (关键参数:数值越大越粘稠,如蜂蜜)
surface_tension=0.01, # 表面张力
friction=0.1, # 粒子间摩擦
damping=0.1 # 阻尼
)
# 4. 创建流体几何体 (在试管初始位置生成一堆粒子)
# 假设我们在 (0, 0, 0.5) 的位置生成一团液体
fluid_path = "/World/Liquid"
# 使用网格采样创建粒子点集
particleUtils.add_physx_particleset_pointinstancer(
stage=stage,
path=fluid_path,
particle_system_path=particle_system_path,
self_collision=True,
fluid=True,
particle_group=0,
particle_mass=0.001,
density=1000.0 # 水的密度
)
# 这里的关键是将材质绑定到几何体
physxUtils.add_physics_material_to_prim(stage, stage.GetPrimAtPath(fluid_path), material_path)
# 这里的逻辑通常需要编写一个简单的生成器,在空间中填满粒子
# 为了演示简单,我们假设通过 create_grid_particles 生成位置
# 在实际 Isaac Lab 中,你通常会加载一个预先保存好的带粒子的 USD或者使用 SamplingAPI
points = []
for x in range(5):
for y in range(5):
for z in range(10):
points.append(Gf.Vec3f(x*0.04, y*0.04, 0.5 + z*0.04))
# 将坐标赋予粒子系统
points_attr = stage.GetPrimAtPath(fluid_path).GetAttribute("positions")
points_attr.Set(points)
# 5. 创建容器 (烧杯和试管)
# 这里使用简单的圆柱管演示,实际应用需加载 .usd 模型
# 烧杯 (底部接收)
beaker_path = "/World/Beaker"
prim_utils.create_cyl(
prim_path=beaker_path,
radius=0.15,
height=0.2,
position=Gf.Vec3d(0, 0, 0.1)
)
# 注意PrimUtils创建的是实心凸包流体进不去。
# 关键步骤:必须将容器碰撞改为 Mesh (SDF) 或手动用多个面拼成空心杯子。
# 在代码中通常加载自定义 USD 资产,这里仅做说明:
# setup_concave_collision(beaker_path)
# 试管 (上方倒水)
tube_path = "/World/TestTube"
tube = prim_utils.create_cyl(
prim_path=tube_path,
radius=0.05,
height=0.2,
position=Gf.Vec3d(0.2, 0, 0.6) # 位于烧杯上方侧面
)
# 为试管添加刚体属性以便旋转
physxUtils.set_rigid_body_properties(stage, tube_path)
return sim, tube_path
def run_simulation(sim, tube_path):
# 6. 开始循环并执行“倒水”动作
sim.initialize_physics()
sim.play()
stage = get_current_stage()
tube_prim = stage.GetPrimAtPath(tube_path)
xform = UsdGeom.Xformable(tube_prim)
# 模拟 500 帧
for i in range(500):
# 简单的运动学控制:慢慢旋转试管
if i < 200:
# 旋转以倒出液体 (欧拉角或四元数)
rotation = Gf.Rotation(Gf.Vec3d(0, 1, 0), i * 0.5) # 绕Y轴旋转
# 注意:实际代码需处理完整的 Transform 设置
current_xform = xform.GetLocalTransformation()
# 这里简化处理,直接更新姿态逻辑需根据具体场景编写
sim.step()
# 执行
if __name__ == "__main__":
sim, tube = create_fluid_scene()
run_simulation(sim, tube)

View File

@@ -0,0 +1,70 @@
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg
CENTRIFUGE_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0,
linear_damping=0.5,
angular_damping=0.5,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒
# fix_root_link=True,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
init_state=ArticulationCfg.InitialStateCfg(
# 1. 参照机器人配置,在这里定义初始关节角度
joint_pos={
"r1": math.radians(0.0), # 转子位置(无关紧要)
# 【关键修改】:初始让盖子处于打开状态
# 您的 USD 限位是 (-100, 0)-100度为最大开启
"r2": math.radians(-100.0),
},
pos=(0.80, -0.25, 0.8085),#(0.95, -0.3, 0.8085)
rot=[-0.7071, 0.0, 0.0, 0.7071],
# rot=[0.0, 0.0, 0.0, 1.0],
),
actuators={
"lid_passive_mechanism": ImplicitActuatorCfg(
joint_names_expr=["r2"],
# 【关键差异说明】
# 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。
#
# 但是!离心机盖子如果设为 10000它就会像焊死在空中一样机器人根本按不动。
# 这里设为 200.0 左右:
# 1. 力度足以克服重力,让盖子初始保持打开。
# 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。
stiffness=200.0,
# 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃
damping=20.0,
# 确保力矩上限不要太小,否则托不住盖子
effort_limit_sim=1000.0,
velocity_limit_sim=100.0,
),
# 转子可以硬一点,不需要被机器人按动
"rotor_control": ImplicitActuatorCfg(
joint_names_expr=["r1"],
stiffness=0.0,
damping=10.0,
),
}
)

View File

@@ -0,0 +1,29 @@
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)

View File

@@ -0,0 +1,47 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configuration for a simple Cartpole robot."""
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg
##
# Configuration
##
Tube_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Tube",
init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.9523,-0.2512,1.0923],
pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633)
# rot=[-0.7071, 0.0, 0.0, 0.7071],
rot=[0.0, 0.0, 0.0, 1.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)

View File

@@ -0,0 +1,51 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configuration for a simple Cartpole robot."""
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
# from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg
from isaaclab.assets import RigidObjectCfg
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
# Configuration
##
TubeRack_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/TubeRack",
init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.9523,-0.2512,1.0923],
pos=[0.803 , 0.65, 0.9],#(0.9988, -0.2977, 1.0498633)
# rot=[-0.7071, 0.0, 0.0, 0.7071],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/Test_Tube_Rack_AA_01.usdc",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)

View File

@@ -101,7 +101,7 @@ MINDBOT_CFG = ArticulationCfg(
"right_hand_joint_left": 0.0,
"right_hand_joint_right": 0.0,
# trunk
"PrismaticJoint": 0.23, # 0.30
"PrismaticJoint": 0.26, # 0.30
# head
"head_revoluteJoint": 0.0 #0.5236
},

View File

@@ -24,6 +24,9 @@ from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from isaaclab.sensors import CameraCfg
from mindbot.assets.tube_rack import TubeRack_CFG
from . import mdp
##
@@ -61,8 +64,8 @@ TABLE_CFG=RigidObjectCfg(
),
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
Tube_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Tube",
init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.9523,-0.2512,1.0923],
pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633)
@@ -218,7 +221,10 @@ class MindbotSceneCfg(InteractiveSceneCfg):
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
tube: RigidObjectCfg = Tube_CFG.replace(prim_path="{ENV_REGEX_NS}/Tube")
# tube rack
# tube_rack: RigidObjectCfg = TubeRack_CFG.replace(prim_path="{ENV_REGEX_NS}/TubeRack")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
@@ -283,21 +289,21 @@ class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
# left_arm_ee = DifferentialInverseKinematicsActionCfg(
# asset_name="Mindbot",
# joint_names=["r_joint[1-6]"], # 左臂的6个关节
# body_name="right_hand_body", # 末端执行器body名称
# controller=DifferentialIKControllerCfg(
# command_type="pose", # 控制位置+姿态
# use_relative_mode=True, # 相对模式:动作是增量
# ik_method="dls", # Damped Least Squares方法
# ),
# scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
# )
right_arm_fixed = mdp.JointPositionActionCfg(
right_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
joint_names=["r_joint[1-6]"], # 左臂的6个关节
body_name="right_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
left_arm_fixed = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
scale=0.0,
@@ -305,12 +311,12 @@ class ActionsCfg:
# 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
# 对应 (135, -70, -90, 90, 90, -70)
offset={
"r_joint1": 2.3562,
"r_joint2": -1.2217,
"r_joint3": -1.5708,
"r_joint4": 1.5708,
"r_joint5": 1.5708,
"r_joint6": -1.2217,
"l_joint1": 2.3562,
"l_joint2": -1.2217,
"l_joint3": -1.5708,
"l_joint4": 1.5708,
"l_joint5": 1.5708,
"l_joint6": -1.2217,
},
)
@@ -331,7 +337,7 @@ class ActionsCfg:
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.24, # 0.3
offset=0.26, # 0.3
clip=None
)
@@ -359,7 +365,7 @@ class ObservationsCfg:
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("tube")})
centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
def __post_init__(self) -> None:
@@ -422,11 +428,11 @@ class EventCfg:
"velocity_range": {"x": (0.0, 0.0)}
}
)
reset_lid = EventTerm(
reset_tube = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("lid"),
"asset_cfg": SceneEntityCfg("tube"),
# "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
"pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)},
"velocity_range": {"x": (0.0, 0.0)}
@@ -441,7 +447,7 @@ class RewardsCfg:
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "right_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
@@ -457,7 +463,7 @@ class RewardsCfg:
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
@@ -469,7 +475,7 @@ class RewardsCfg:
approach_lid_penalty = RewTerm(
func=mdp.gripper_lid_distance_penalty,
params={
"lid_cfg": SceneEntityCfg("lid"),
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
@@ -546,7 +552,7 @@ class TerminationsCfg:
lid_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
params={"asset_cfg": SceneEntityCfg("tube"), "maximum_height": 2.0},
)
# 新增:盖子掉落判定
@@ -554,7 +560,7 @@ class TerminationsCfg:
lid_dropped = DoneTerm(
func=mdp.base_height_failure, # 复用高度判定函数
params={
"asset_cfg": SceneEntityCfg("lid"),
"asset_cfg": SceneEntityCfg("tube"),
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
},
)

View File

@@ -0,0 +1,30 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-centrifuge-tube-put-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg",
# "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
# experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
experiment_name = "mindbot_centrifuge_LidUp"
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,636 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import A, T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from isaaclab.sensors import CameraCfg
from mindbot.assets.tube_rack import TubeRack_CFG
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
Tube_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Tube",
init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.9523,-0.2512,1.0923],
pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633)
# rot=[-0.7071, 0.0, 0.0, 0.7071],
rot=[0.0, 0.0, 0.0, 1.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
# LID_CFG = RigidObjectCfg(
# prim_path="{ENV_REGEX_NS}/Lid",
# init_state=RigidObjectCfg.InitialStateCfg(
# # pos=[0.9523,-0.2512,1.0923],
# pos=[0.8488, -0.2477, 1.0498633],#(0.9988, -0.2977, 1.0498633)
# # rot=[-0.7071, 0.0, 0.0, 0.7071],
# rot=[0.0, 0.0, 0.0, 1.0],
# lin_vel=[0.0, 0.0, 0.0],
# ang_vel=[0.0, 0.0, 0.0],
# ),
# spawn=UsdFileCfg(
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
# # scale=(0.2, 0.2, 0.2),
# rigid_props=RigidBodyPropertiesCfg(
# rigid_body_enabled= True,
# solver_position_iteration_count=32,
# solver_velocity_iteration_count=16,
# max_angular_velocity=1000.0,
# max_linear_velocity=1000.0,
# max_depenetration_velocity=0.5,#original 5.0
# linear_damping=5.0, #original 0.5
# angular_damping=5.0, #original 0.5
# disable_gravity=False,
# ),
# collision_props=CollisionPropertiesCfg(
# collision_enabled=True,
# contact_offset=0.0005,#original 0.02
# rest_offset=0
# ),
# ),
# )
CENTRIFUGE_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0,
linear_damping=0.5,
angular_damping=0.5,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒
# fix_root_link=True,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
init_state=ArticulationCfg.InitialStateCfg(
# 1. 参照机器人配置,在这里定义初始关节角度
joint_pos={
"r1": math.radians(0.0), # 转子位置(无关紧要)
# 【关键修改】:初始让盖子处于打开状态
# 您的 USD 限位是 (-100, 0)-100度为最大开启
"r2": math.radians(-100.0),
},
pos=(0.80, -0.25, 0.8085),#(0.95, -0.3, 0.8085)
rot=[-0.7071, 0.0, 0.0, 0.7071],
# rot=[0.0, 0.0, 0.0, 1.0],
),
actuators={
"lid_passive_mechanism": ImplicitActuatorCfg(
joint_names_expr=["r2"],
# 【关键差异说明】
# 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。
#
# 但是!离心机盖子如果设为 10000它就会像焊死在空中一样机器人根本按不动。
# 这里设为 200.0 左右:
# 1. 力度足以克服重力,让盖子初始保持打开。
# 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。
stiffness=200.0,
# 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃
damping=20.0,
# 确保力矩上限不要太小,否则托不住盖子
effort_limit_sim=1000.0,
velocity_limit_sim=100.0,
),
# 转子可以硬一点,不需要被机器人按动
"rotor_control": ImplicitActuatorCfg(
joint_names_expr=["r1"],
stiffness=0.0,
damping=10.0,
),
}
)
ROOM_CFG = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Room",
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
),
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
# Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
# lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
tube: RigidObjectCfg = Tube_CFG.replace(prim_path="{ENV_REGEX_NS}/Tube")
# tube rack
tube_rack: RigidObjectCfg = TubeRack_CFG.replace(prim_path="{ENV_REGEX_NS}/TubeRack")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
"""
添加相机定义
# head_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_period=1/120,
# height=480,
# width=640,
# data_type=["rgb"],
# spawn=None,
# )
"""
# left_hand_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# right_hand_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# head_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# chest_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
right_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["r_joint[1-6]"], # 左臂的6个关节
body_name="right_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
left_arm_fixed = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
scale=0.0,
# 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
# 对应 (135, -70, -90, 90, 90, -70)
offset={
"l_joint1": 2.3562,
"l_joint2": -1.2217,
"l_joint3": -1.5708,
"l_joint4": 1.5708,
"l_joint5": 1.5708,
"l_joint6": -1.2217,
},
)
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.26, # 0.3
clip=None
)
centrifuge_lid_passive = mdp.JointPositionActionCfg(
asset_name="centrifuge", # 对应场景中的名称
joint_names=["r2"],
# 将 scale 设为 0意味着 RL 算法输出的任何值都会被乘 0即无法干扰它
scale=0.00,
# 将 offset 设为目标角度,这会成为 PD 控制器的恒定 Target
offset= -1.7453,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("tube")})
centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移must be
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 2. 底座安装误差 (建议缩小到 2cm)
reset_mindbot_base = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"pose_range": {
"x": (-0.05, 0.05),
"y": (-0.05, 0.05),
"z": (0.74, 0.75),
"yaw": (-0.1, 0.1),
},
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
},
)
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
"position_range": (-0.01, 0.01),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_centrifuge = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("centrifuge"),
# "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
"pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
reset_tube = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("tube"),
# "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
"pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "right_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 1.0, 0.0),
"max_angle_deg": 85.0, # 允许60度内的偏差
},
weight=5.0 #original 5.0
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.115, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度
},
weight=3.0 #original 3.0
)
approach_lid_penalty = RewTerm(
func=mdp.gripper_lid_distance_penalty,
params={
"lid_cfg": SceneEntityCfg("tube"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.115,
},
# weight 为负数表示惩罚。
# 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。
weight=-1.0
)
# # 【新增】精细对齐 (引导进入 2cm 圈)
# gripper_lid_fine_alignment = RewTerm(
# func=mdp.gripper_lid_position_alignment,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "left_gripper_name": "left_hand__l",
# "right_gripper_name": "left_hand_r",
# "height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
# "std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
# },
# weight=10.0 # 高权重
# )
# gripper_close_interaction = RewTerm(
# func=mdp.gripper_close_when_near,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "left_gripper_name": "left_hand__l",
# "right_gripper_name": "left_hand_r",
# "joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
# "target_close_pos": 0.03,
# "height_offset": 0.04,
# "grasp_radius": 0.03,
# },
# weight=10.0
# )
# lid_lifted_reward = RewTerm(
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
# },
# weight=10.0 # 给一个足够大的权重
# )
# lid_lifting_reward = RewTerm(
# func=mdp.lid_lift_progress_reward,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
# "target_height_lift": 0.2,
# "height_offset": 0.07, # 与其他奖励保持一致
# "std": 0.1
# },
# # 权重设大一点,告诉它这是最终目标
# weight=10.0
# )
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
lid_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("tube"), "maximum_height": 2.0},
)
# 新增:盖子掉落判定
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
lid_dropped = DoneTerm(
func=mdp.base_height_failure, # 复用高度判定函数
params={
"asset_cfg": SceneEntityCfg("tube"),
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
},
)
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,881 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.1,
# std: float = 0.1) -> torch.Tensor:
# """奖励函数夹爪相对于lid的位置对齐。
# Args:
# env: 环境实例
# lid_cfg: lid的配置
# robot_cfg: 机器人的配置
# left_gripper_name: 左侧夹爪body名称
# right_gripper_name: 右侧夹爪body名称
# side_distance: Y方向的期望距离
# height_offset: Z方向的期望高度偏移
# std: tanh核函数的标准差
# Returns:
# 位置对齐奖励 (num_envs,)
# """
# lid: RigidObject = env.scene[lid_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3]
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # 计算相对位置(夹爪中心相对于 lid 中心)
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# # X 方向位置:应该对齐(接近 0
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
# x_reward = 1.0 - torch.tanh(x_dist / std)
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
# # y_error = torch.abs(y_dist - side_distance)
# # y_reward = 1.0 - torch.tanh(y_error / std)
# y_reward = 1.0 - torch.tanh(y_dist / std)
# # Z 方向位置:应该在 lid 上方 height_offset 处
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
# z_reward = 1.0 - torch.tanh(z_error / std)
# # 组合位置奖励
# position_reward = x_reward * y_reward * z_reward
# return position_reward
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数夹爪相对于lid的位置对齐简化版
计算夹爪中心到目标点lid上方height_offset处的距离奖励。
Args:
env: 环境实例
lid_cfg: lid的配置
robot_cfg: 机器人的配置
left_gripper_name: 左侧夹爪body名称
right_gripper_name: 右侧夹爪body名称
height_offset: Z方向的期望高度偏移目标点在lid上方
std: tanh核函数的标准差
Returns:
位置对齐奖励 (num_envs,)
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 目标点lid位置 + height_offsetZ方向
target_pos = lid_pos_w.clone()
# print(target_pos[0])
target_pos[:, 2] += height_offset # 在lid上方height_offset处
# print(target_pos[0])
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# print(gripper_center[0])
# 计算夹爪中心到目标点的3D距离
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 距离奖励:距离越小,奖励越大
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# ... (保留原有的 import)
# ==============================================================================
# 新增:直接距离惩罚 (线性引导)
# ==============================================================================
def gripper_lid_distance_penalty(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.07,
) -> torch.Tensor:
"""
计算夹爪中心到目标点的欧式距离。
返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
这样:距离越远 -> 惩罚越大 (Total Reward 越低)。
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# 1. 目标点 (Lid中心 + 高度偏移)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 2. 夹爪中心
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_pos + right_pos) / 2.0
# 3. 计算距离
distance = torch.norm(gripper_center - lid_pos_w, dim=1)
return distance
# ==============================================================================
# 优化:位置对齐 (保持原函数,但确保逻辑清晰)
# ==============================================================================
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
target_pos = lid_pos_w.clone()
target_pos[:, 2] += height_offset
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# def gripper_lid_orientation_alignment(
# env,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# gripper_body_name: str = "left_hand_body",
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
# ) -> torch.Tensor:
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 获取姿态
# lid_quat_w = lid.data.root_quat_w
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# # 3. 垂直对齐 (Vertical Alignment)
# # 目标:夹爪 +Y 指向 World -Z
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
# # print(grip_fwd_world[0])
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
# # rew_vertical = (dot_vertical + 1.0) / 2.0
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
# # nan -> 0
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# # 4. 偏航对齐 (Yaw Alignment)
# # 目标:夹爪 +Z 平行于 Lid +Y
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# # nan -> 0
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# # 5. 组合
# total_reward = rew_vertical * rew_yaw
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# # debug
# if not torch.isfinite(total_reward).all():
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
# print(f"rew_vertical: {rew_vertical}")
# print(f"rew_yaw: {rew_yaw}")
# return total_reward
def gripper_lid_orientation_alignment(
env,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
) -> torch.Tensor:
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 获取姿态
lid_quat_w = lid.data.root_quat_w
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# 3. 垂直对齐 (Vertical Alignment)
# 目标:夹爪前向轴指向 World -Z向下
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# 计算点积(归一化后,点积 = cos(角度)
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
# 计算角度(弧度)
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
# 如果角度 <= max_angle_deg给予奖励
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
# nan -> 0
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# 4. 偏航对齐 (Yaw Alignment)
# 目标:夹爪 +Z 平行于 Lid +Y
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_vertical * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
# # 注意:确保 joint_names 只包含那两个夹爪关节
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
# ) -> torch.Tensor:
# """
# 逻辑很简单:
# 1. 如果 距离 < close_threshold目标关节位置 = 0.03 (闭合)
# 2. 如果 距离 >= close_threshold目标关节位置 = 0.0 (张开)
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算距离 (末端 vs Lid中心)
# lid_pos_w = lid.data.root_pos_w[:, :3]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # distance: (num_envs,)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 3. 动态确定目标关节值
# # 如果距离小于阈值,目标设为 close_pos否则设为 open_pos
# # target_val: (num_envs, 1)
# # print(distance)
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 4. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# # joint_pos: (num_envs, 2)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# # 取绝对值防止左指是负数的情况虽然你的配置里范围看起来都是正的加上abs更保险
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 5. 计算误差
# # error: (num_envs,) -> 两个手指误差的平均值
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # # 6. 返回负奖励 (惩罚)
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
# # return -error
# # =====================================================
# # 🚀 核心修复:安全输出
# # =====================================================
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
# reward = -torch.tanh(error / 0.01)
# # 2. 过滤 NaN这一步能救命
# # 如果物理引擎算出 NaN这里会把它变成 0.0,防止训练崩溃
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
# return reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05,
# target_open_pos: float = 0.0,
# target_close_pos: float = 0.03,
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
# ) -> torch.Tensor:
# """
# 逻辑:
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
# 2. 如果距离 < close_threshold目标关节位置 = 闭合。
# 3. 否则:目标关节位置 = 张开。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
# # 获取夹爪位置
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # 3. 计算距离 (夹爪 vs 目标抓取点)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 4. 动态确定目标关节值
# # 靠近抓取点 -> 闭合;远离 -> 张开
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 5. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 6. 计算误差并返回奖励
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # 使用 tanh 限制数值范围
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
# return torch.nan_to_num(reward, nan=0.0)
def _is_grasped(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪Z轴高度合适不能压在盖子上面 (Z-Axis Check)
"""
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
is_near = dist_to_handle < grasp_radius
# 5. 条件二Z轴高度判定 (防止压在盖子上)
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
# 获取关节位置 (绝对值)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# === 修改闭合判定 ===
# 1. 关节位置判定 (Effort Check)
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
# 假设 0.05 是你的新 target_close_pos
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
# 获取指尖位置
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
# 假设把手厚度是 1cm (0.01m)
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
# 如果指尖距离 >= 0.005,说明中间有东西挡着
is_not_empty = dist_fingertips > 0.005
# 综合判定:
# 1. 在把手附近 (is_near)
# 2. 高度合适 (is_z_valid)
# 3. 在用力闭合 (is_effort_closing)
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
return is_near & is_z_valid & is_effort_closing & is_not_empty
def gripper_close_when_near(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
target_close_pos: float = 0.03,
height_offset: float = 0.02,
grasp_radius: float = 0.05
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取当前关节位置绝对值 (num_envs, num_joints)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# 计算两个指关节的平均闭合深度
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
# 计算奖励系数:当前位置 / 目标闭合位置
# 这样当关节越接近 0.03 时,奖励越接近 1.0
# 强制让 Agent 产生“压满”动作的冲动
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
# 如果没对准或者没夹到,奖励依然是 0
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds