diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 6306e43..4bd2d15 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -8,5 +8,6 @@ "ms-iot.vscode-ros", "ms-python.black-formatter", "ms-python.flake8", + "saoudrizwan.claude-dev", ] } diff --git a/create_liquid.py b/create_liquid.py new file mode 100644 index 0000000..eb4c28b --- /dev/null +++ b/create_liquid.py @@ -0,0 +1,129 @@ +import omni.isaac.core.utils.prims as prim_utils +from omni.isaac.core.simulation_context import SimulationContext +from omni.isaac.core.utils.stage import get_current_stage +from pxr import UsdGeom, UsdPhysics, PhysxSchema, Gf, Sdf + +# 引入 PhysX 粒子工具 (Omniverse 内置库) +import omni.physx.scripts.utils as physxUtils +import omni.physx.scripts.particleUtils as particleUtils + +def create_fluid_scene(): + stage = get_current_stage() + + # 1. 初始化仿真环境 + sim = SimulationContext() + # 确保物理频率适合流体 (流体通常需要更小的步长或更多的子步) + sim.set_simulation_dt(physics_dt=1.0 / 60.0, rendering_dt=1.0 / 60.0) + + # 2. 创建粒子系统 (Particle System) + # 这是管理所有流体粒子的容器 + particle_system_path = "/World/FluidSystem" + particleUtils.add_physx_particle_system( + stage=stage, + particle_system_path=particle_system_path, + simulation_owner=None, + radius=0.02, # 粒子半径(决定流体分辨率) + contact_offset=0.03, + solid_rest_offset=0.02, + fluid_rest_offset=0.02, + mode="fluid" # 设置为流体模式 + ) + + # 3. 创建流体材质 (定义粘度) + # 对于像蜂蜜或洗发水这样的"带流性"液体,需要高粘度 + material_path = "/World/FluidMaterial" + particleUtils.add_pbd_particle_material( + stage=stage, + path=material_path, + cohesion=0.02, # 内聚力 (让液体聚在一起) + viscosity=0.5, # 粘度 (关键参数:数值越大越粘稠,如蜂蜜) + surface_tension=0.01, # 表面张力 + friction=0.1, # 粒子间摩擦 + damping=0.1 # 阻尼 + ) + + # 4. 创建流体几何体 (在试管初始位置生成一堆粒子) + # 假设我们在 (0, 0, 0.5) 的位置生成一团液体 + fluid_path = "/World/Liquid" + # 使用网格采样创建粒子点集 + particleUtils.add_physx_particleset_pointinstancer( + stage=stage, + path=fluid_path, + particle_system_path=particle_system_path, + self_collision=True, + fluid=True, + particle_group=0, + particle_mass=0.001, + density=1000.0 # 水的密度 + ) + + # 这里的关键是将材质绑定到几何体 + physxUtils.add_physics_material_to_prim(stage, stage.GetPrimAtPath(fluid_path), material_path) + + # 这里的逻辑通常需要编写一个简单的生成器,在空间中填满粒子 + # 为了演示简单,我们假设通过 create_grid_particles 生成位置 + # 在实际 Isaac Lab 中,你通常会加载一个预先保存好的带粒子的 USD,或者使用 SamplingAPI + points = [] + for x in range(5): + for y in range(5): + for z in range(10): + points.append(Gf.Vec3f(x*0.04, y*0.04, 0.5 + z*0.04)) + + # 将坐标赋予粒子系统 + points_attr = stage.GetPrimAtPath(fluid_path).GetAttribute("positions") + points_attr.Set(points) + + # 5. 创建容器 (烧杯和试管) + # 这里使用简单的圆柱管演示,实际应用需加载 .usd 模型 + + # 烧杯 (底部接收) + beaker_path = "/World/Beaker" + prim_utils.create_cyl( + prim_path=beaker_path, + radius=0.15, + height=0.2, + position=Gf.Vec3d(0, 0, 0.1) + ) + # 注意:PrimUtils创建的是实心凸包,流体进不去。 + # 关键步骤:必须将容器碰撞改为 Mesh (SDF) 或手动用多个面拼成空心杯子。 + # 在代码中通常加载自定义 USD 资产,这里仅做说明: + # setup_concave_collision(beaker_path) + + # 试管 (上方倒水) + tube_path = "/World/TestTube" + tube = prim_utils.create_cyl( + prim_path=tube_path, + radius=0.05, + height=0.2, + position=Gf.Vec3d(0.2, 0, 0.6) # 位于烧杯上方侧面 + ) + # 为试管添加刚体属性以便旋转 + physxUtils.set_rigid_body_properties(stage, tube_path) + + return sim, tube_path + +def run_simulation(sim, tube_path): + # 6. 开始循环并执行“倒水”动作 + sim.initialize_physics() + sim.play() + + stage = get_current_stage() + tube_prim = stage.GetPrimAtPath(tube_path) + xform = UsdGeom.Xformable(tube_prim) + + # 模拟 500 帧 + for i in range(500): + # 简单的运动学控制:慢慢旋转试管 + if i < 200: + # 旋转以倒出液体 (欧拉角或四元数) + rotation = Gf.Rotation(Gf.Vec3d(0, 1, 0), i * 0.5) # 绕Y轴旋转 + # 注意:实际代码需处理完整的 Transform 设置 + current_xform = xform.GetLocalTransformation() + # 这里简化处理,直接更新姿态逻辑需根据具体场景编写 + + sim.step() + +# 执行 +if __name__ == "__main__": + sim, tube = create_fluid_scene() + run_simulation(sim, tube) diff --git a/source/mindbot/mindbot/assets/centrifuge.py b/source/mindbot/mindbot/assets/centrifuge.py new file mode 100644 index 0000000..368f436 --- /dev/null +++ b/source/mindbot/mindbot/assets/centrifuge.py @@ -0,0 +1,70 @@ +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg + +CENTRIFUGE_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0, + linear_damping=0.5, + angular_damping=0.5, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒 + # fix_root_link=True, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + # 1. 参照机器人配置,在这里定义初始关节角度 + joint_pos={ + "r1": math.radians(0.0), # 转子位置(无关紧要) + + # 【关键修改】:初始让盖子处于打开状态 + # 您的 USD 限位是 (-100, 0),-100度为最大开启 + "r2": math.radians(-100.0), + }, + pos=(0.80, -0.25, 0.8085),#(0.95, -0.3, 0.8085) + rot=[-0.7071, 0.0, 0.0, 0.7071], + # rot=[0.0, 0.0, 0.0, 1.0], + ), + actuators={ + "lid_passive_mechanism": ImplicitActuatorCfg( + joint_names_expr=["r2"], + + # 【关键差异说明】 + # 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。 + # + # 但是!离心机盖子如果设为 10000,它就会像焊死在空中一样,机器人根本按不动。 + # 这里设为 200.0 左右: + # 1. 力度足以克服重力,让盖子初始保持打开。 + # 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。 + stiffness=200.0, + + # 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃 + damping=20.0, + + # 确保力矩上限不要太小,否则托不住盖子 + effort_limit_sim=1000.0, + velocity_limit_sim=100.0, + ), + # 转子可以硬一点,不需要被机器人按动 + "rotor_control": ImplicitActuatorCfg( + joint_names_expr=["r1"], + stiffness=0.0, + damping=10.0, + ), + } +) \ No newline at end of file diff --git a/source/mindbot/mindbot/assets/table.py b/source/mindbot/mindbot/assets/table.py new file mode 100644 index 0000000..942705c --- /dev/null +++ b/source/mindbot/mindbot/assets/table.py @@ -0,0 +1,29 @@ +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg + +TABLE_CFG=RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, -0.3, 0.005], + rot=[0.7071, 0.0, 0.0, 0.7071], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd", + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) \ No newline at end of file diff --git a/source/mindbot/mindbot/assets/tube.py b/source/mindbot/mindbot/assets/tube.py new file mode 100644 index 0000000..2778c14 --- /dev/null +++ b/source/mindbot/mindbot/assets/tube.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for a simple Cartpole robot.""" + + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg +## +# Configuration +## +Tube_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Tube", + init_state=RigidObjectCfg.InitialStateCfg( + # pos=[0.9523,-0.2512,1.0923], + pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633) + # rot=[-0.7071, 0.0, 0.0, 0.7071], + rot=[0.0, 0.0, 0.0, 1.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) \ No newline at end of file diff --git a/source/mindbot/mindbot/assets/tube_rack.py b/source/mindbot/mindbot/assets/tube_rack.py new file mode 100644 index 0000000..b9a586c --- /dev/null +++ b/source/mindbot/mindbot/assets/tube_rack.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for a simple Cartpole robot.""" + + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +# from isaaclab.assets import RigidObjectCfg, UsdFileCfg, RigidBodyPropertiesCfg, CollisionPropertiesCfg +from isaaclab.assets import RigidObjectCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg + +# Configuration +## +TubeRack_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/TubeRack", + init_state=RigidObjectCfg.InitialStateCfg( + # pos=[0.9523,-0.2512,1.0923], + pos=[0.803 , 0.65, 0.9],#(0.9988, -0.2977, 1.0498633) + # rot=[-0.7071, 0.0, 0.0, 0.7071], + rot=[1.0, 0.0, 0.0, 0.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/Test_Tube_Rack_AA_01.usdc", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + diff --git a/source/mindbot/mindbot/robot/mindbot.py b/source/mindbot/mindbot/robot/mindbot.py index 821630a..3083486 100644 --- a/source/mindbot/mindbot/robot/mindbot.py +++ b/source/mindbot/mindbot/robot/mindbot.py @@ -101,7 +101,7 @@ MINDBOT_CFG = ArticulationCfg( "right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0, # trunk - "PrismaticJoint": 0.23, # 0.30 + "PrismaticJoint": 0.26, # 0.30 # head "head_revoluteJoint": 0.0 #0.5236 }, diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py index 6e24fe5..512ba07 100644 --- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py @@ -24,6 +24,9 @@ from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg from isaaclab.sensors import CameraCfg +from mindbot.assets.tube_rack import TubeRack_CFG + + from . import mdp ## @@ -61,8 +64,8 @@ TABLE_CFG=RigidObjectCfg( ), ) -LID_CFG = RigidObjectCfg( - prim_path="{ENV_REGEX_NS}/Lid", +Tube_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Tube", init_state=RigidObjectCfg.InitialStateCfg( # pos=[0.9523,-0.2512,1.0923], pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633) @@ -218,7 +221,10 @@ class MindbotSceneCfg(InteractiveSceneCfg): Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot") table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table") centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge") - lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + tube: RigidObjectCfg = Tube_CFG.replace(prim_path="{ENV_REGEX_NS}/Tube") + # tube rack + # tube_rack: RigidObjectCfg = TubeRack_CFG.replace(prim_path="{ENV_REGEX_NS}/TubeRack") # lights dome_light = AssetBaseCfg( prim_path="/World/DomeLight", @@ -283,21 +289,21 @@ class ActionsCfg: """Action specifications for the MDP.""" # 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维) - # left_arm_ee = DifferentialInverseKinematicsActionCfg( - # asset_name="Mindbot", - # joint_names=["r_joint[1-6]"], # 左臂的6个关节 - # body_name="right_hand_body", # 末端执行器body名称 - # controller=DifferentialIKControllerCfg( - # command_type="pose", # 控制位置+姿态 - # use_relative_mode=True, # 相对模式:动作是增量 - # ik_method="dls", # Damped Least Squares方法 - # ), - # scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), - # ) - - right_arm_fixed = mdp.JointPositionActionCfg( + right_arm_ee = DifferentialInverseKinematicsActionCfg( asset_name="Mindbot", - joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6 + joint_names=["r_joint[1-6]"], # 左臂的6个关节 + body_name="right_hand_body", # 末端执行器body名称 + controller=DifferentialIKControllerCfg( + command_type="pose", # 控制位置+姿态 + use_relative_mode=True, # 相对模式:动作是增量 + ik_method="dls", # Damped Least Squares方法 + ), + scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), + ) + + left_arm_fixed = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6 # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动 scale=0.0, @@ -305,12 +311,12 @@ class ActionsCfg: # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里” # 对应 (135, -70, -90, 90, 90, -70) offset={ - "r_joint1": 2.3562, - "r_joint2": -1.2217, - "r_joint3": -1.5708, - "r_joint4": 1.5708, - "r_joint5": 1.5708, - "r_joint6": -1.2217, + "l_joint1": 2.3562, + "l_joint2": -1.2217, + "l_joint3": -1.5708, + "l_joint4": 1.5708, + "l_joint5": 1.5708, + "l_joint6": -1.2217, }, ) @@ -331,7 +337,7 @@ class ActionsCfg: asset_name="Mindbot", joint_names=["PrismaticJoint"], scale=0.00, - offset=0.24, # 0.3 + offset=0.26, # 0.3 clip=None ) @@ -359,7 +365,7 @@ class ObservationsCfg: left_gripper_pos = ObsTerm(func=left_gripper_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])}) - lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}) + lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("tube")}) centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")}) def __post_init__(self) -> None: @@ -422,11 +428,11 @@ class EventCfg: "velocity_range": {"x": (0.0, 0.0)} } ) - reset_lid = EventTerm( + reset_tube = EventTerm( func=mdp.reset_root_state_uniform, mode="reset", params={ - "asset_cfg": SceneEntityCfg("lid"), + "asset_cfg": SceneEntityCfg("tube"), # "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)} @@ -441,7 +447,7 @@ class RewardsCfg: gripper_lid_orientation_alignment = RewTerm( func=mdp.gripper_lid_orientation_alignment, params={ - "lid_cfg": SceneEntityCfg("lid"), + "lid_cfg": SceneEntityCfg("tube"), "robot_cfg": SceneEntityCfg("Mindbot"), "gripper_body_name": "right_hand_body", "gripper_forward_axis": (0.0, 0.0, 1.0), @@ -457,7 +463,7 @@ class RewardsCfg: gripper_lid_position_alignment = RewTerm( func=mdp.gripper_lid_position_alignment, params={ - "lid_cfg": SceneEntityCfg("lid"), + "lid_cfg": SceneEntityCfg("tube"), "robot_cfg": SceneEntityCfg("Mindbot"), "left_gripper_name": "right_hand_l", "right_gripper_name": "right_hand__r", @@ -469,7 +475,7 @@ class RewardsCfg: approach_lid_penalty = RewTerm( func=mdp.gripper_lid_distance_penalty, params={ - "lid_cfg": SceneEntityCfg("lid"), + "lid_cfg": SceneEntityCfg("tube"), "robot_cfg": SceneEntityCfg("Mindbot"), "left_gripper_name": "right_hand_l", "right_gripper_name": "right_hand__r", @@ -546,7 +552,7 @@ class TerminationsCfg: lid_fly_away = DoneTerm( func=mdp.base_height_failure, - params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0}, + params={"asset_cfg": SceneEntityCfg("tube"), "maximum_height": 2.0}, ) # 新增:盖子掉落判定 @@ -554,7 +560,7 @@ class TerminationsCfg: lid_dropped = DoneTerm( func=mdp.base_height_failure, # 复用高度判定函数 params={ - "asset_cfg": SceneEntityCfg("lid"), + "asset_cfg": SceneEntityCfg("tube"), "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了 }, ) diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/__init__.py new file mode 100644 index 0000000..2f647a7 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Template-centrifuge-tube-put-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg", + # "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..dec1d54 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 # 稍微增加每轮步数 + max_iterations = 5000 # 增加迭代次数,给它足够的时间学习 + save_interval = 100 + # experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了 + experiment_name = "mindbot_centrifuge_LidUp" + + policy = RslRlPpoActorCriticCfg( + init_noise_std=0.7, + actor_obs_normalization=True, # 开启归一化 + critic_obs_normalization=True, # 开启归一化 + # 增加网络容量:三层 MLP,宽度足够 + actor_hidden_dims=[128, 64, 32], + critic_hidden_dims=[128, 64, 32], + activation="elu", + ) + + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, # 保持适度的探索 + num_learning_epochs=5, + num_mini_batches=8, + learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4 + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +# @configclass +# class PPORunnerCfg(RslRlOnPolicyRunnerCfg): +# num_steps_per_env = 16 +# max_iterations = 150 +# save_interval = 50 +# experiment_name = "cartpole_direct" +# policy = RslRlPpoActorCriticCfg( +# init_noise_std=1.0, +# actor_obs_normalization=False, +# critic_obs_normalization=False, +# actor_hidden_dims=[32, 32], +# critic_hidden_dims=[32, 32], +# activation="elu", +# ) +# algorithm = RslRlPpoAlgorithmCfg( +# value_loss_coef=1.0, +# use_clipped_value_loss=True, +# clip_param=0.2, +# entropy_coef=0.005, +# num_learning_epochs=5, +# num_mini_batches=4, +# learning_rate=1.0e-3, +# schedule="adaptive", +# gamma=0.99, +# lam=0.95, +# desired_kl=0.01, +# max_grad_norm=1.0, +# ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/centrifuge_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/centrifuge_env_cfg.py new file mode 100644 index 0000000..83bbc41 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/centrifuge_env_cfg.py @@ -0,0 +1,636 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from re import A, T +from tkinter import N +import torch +from isaaclab.actuators import ImplicitActuatorCfg +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg +from isaaclab.sensors import CameraCfg + +from mindbot.assets.tube_rack import TubeRack_CFG + + +from . import mdp + +## +# Pre-defined configs +## + +from mindbot.robot.mindbot import MINDBOT_CFG + +# ====== 其他物体配置 ====== +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg + + +TABLE_CFG=RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, -0.3, 0.005], + rot=[0.7071, 0.0, 0.0, 0.7071], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd", + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +Tube_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Tube", + init_state=RigidObjectCfg.InitialStateCfg( + # pos=[0.9523,-0.2512,1.0923], + pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633) + # rot=[-0.7071, 0.0, 0.0, 0.7071], + rot=[0.0, 0.0, 0.0, 1.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +# LID_CFG = RigidObjectCfg( +# prim_path="{ENV_REGEX_NS}/Lid", +# init_state=RigidObjectCfg.InitialStateCfg( +# # pos=[0.9523,-0.2512,1.0923], +# pos=[0.8488, -0.2477, 1.0498633],#(0.9988, -0.2977, 1.0498633) +# # rot=[-0.7071, 0.0, 0.0, 0.7071], +# rot=[0.0, 0.0, 0.0, 1.0], +# lin_vel=[0.0, 0.0, 0.0], +# ang_vel=[0.0, 0.0, 0.0], +# ), +# spawn=UsdFileCfg( +# usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd", +# # scale=(0.2, 0.2, 0.2), +# rigid_props=RigidBodyPropertiesCfg( +# rigid_body_enabled= True, +# solver_position_iteration_count=32, +# solver_velocity_iteration_count=16, +# max_angular_velocity=1000.0, +# max_linear_velocity=1000.0, +# max_depenetration_velocity=0.5,#original 5.0 +# linear_damping=5.0, #original 0.5 +# angular_damping=5.0, #original 0.5 +# disable_gravity=False, +# ), +# collision_props=CollisionPropertiesCfg( +# collision_enabled=True, +# contact_offset=0.0005,#original 0.02 +# rest_offset=0 +# ), +# ), +# ) + +CENTRIFUGE_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0, + linear_damping=0.5, + angular_damping=0.5, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒 + # fix_root_link=True, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + # 1. 参照机器人配置,在这里定义初始关节角度 + joint_pos={ + "r1": math.radians(0.0), # 转子位置(无关紧要) + + # 【关键修改】:初始让盖子处于打开状态 + # 您的 USD 限位是 (-100, 0),-100度为最大开启 + "r2": math.radians(-100.0), + }, + pos=(0.80, -0.25, 0.8085),#(0.95, -0.3, 0.8085) + rot=[-0.7071, 0.0, 0.0, 0.7071], + # rot=[0.0, 0.0, 0.0, 1.0], + ), + actuators={ + "lid_passive_mechanism": ImplicitActuatorCfg( + joint_names_expr=["r2"], + + # 【关键差异说明】 + # 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。 + # + # 但是!离心机盖子如果设为 10000,它就会像焊死在空中一样,机器人根本按不动。 + # 这里设为 200.0 左右: + # 1. 力度足以克服重力,让盖子初始保持打开。 + # 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。 + stiffness=200.0, + + # 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃 + damping=20.0, + + # 确保力矩上限不要太小,否则托不住盖子 + effort_limit_sim=1000.0, + velocity_limit_sim=100.0, + ), + # 转子可以硬一点,不需要被机器人按动 + "rotor_control": ImplicitActuatorCfg( + joint_names_expr=["r1"], + stiffness=0.0, + damping=10.0, + ), + } +) + + +ROOM_CFG = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Room", + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd", + ), +) + +## +# Scene definition +## + + +@configclass +class MindbotSceneCfg(InteractiveSceneCfg): + """Configuration for a cart-pole scene.""" + + # ground plane + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)), + ) + + # robot + # Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room") + Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot") + table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table") + centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge") + # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + tube: RigidObjectCfg = Tube_CFG.replace(prim_path="{ENV_REGEX_NS}/Tube") + # tube rack + tube_rack: RigidObjectCfg = TubeRack_CFG.replace(prim_path="{ENV_REGEX_NS}/TubeRack") + # lights + dome_light = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), + ) + """ + 添加相机定义 + # head_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_period=1/120, + # height=480, + # width=640, + # data_type=["rgb"], + # spawn=None, + # ) + """ + # left_hand_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # right_hand_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # head_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # chest_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) +## +# MDP settings +## + +def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True) + pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins + return pos_w + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + # 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维) + right_arm_ee = DifferentialInverseKinematicsActionCfg( + asset_name="Mindbot", + joint_names=["r_joint[1-6]"], # 左臂的6个关节 + body_name="right_hand_body", # 末端执行器body名称 + controller=DifferentialIKControllerCfg( + command_type="pose", # 控制位置+姿态 + use_relative_mode=True, # 相对模式:动作是增量 + ik_method="dls", # Damped Least Squares方法 + ), + scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), + ) + + left_arm_fixed = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6 + + # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动 + scale=0.0, + + # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里” + # 对应 (135, -70, -90, 90, 90, -70) + offset={ + "l_joint1": 2.3562, + "l_joint2": -1.2217, + "l_joint3": -1.5708, + "l_joint4": 1.5708, + "l_joint5": 1.5708, + "l_joint6": -1.2217, + }, + ) + + grippers_position = mdp.BinaryJointPositionActionCfg( + asset_name="Mindbot", + joint_names=["left_hand_joint_left", "left_hand_joint_right"], + + # 修正:使用字典格式 + # open_command_expr={"关节名正则": 值} + open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0}, + + # close_command_expr={"关节名正则": 值} + close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03}, + ) + + + trunk_position = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["PrismaticJoint"], + scale=0.00, + offset=0.26, # 0.3 + clip=None + ) + + centrifuge_lid_passive = mdp.JointPositionActionCfg( + asset_name="centrifuge", # 对应场景中的名称 + joint_names=["r2"], + # 将 scale 设为 0,意味着 RL 算法输出的任何值都会被乘 0,即无法干扰它 + scale=0.00, + # 将 offset 设为目标角度,这会成为 PD 控制器的恒定 Target + offset= -1.7453, + clip=None + ) + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + + left_gripper_pos = ObsTerm(func=left_gripper_pos_w, + params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])}) + lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("tube")}) + centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")}) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # 重置所有关节到 init_state(无偏移)must be + reset_mindbot_all_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "position_range": (0.0, 0.0), + "velocity_range": (0.0, 0.0), + }, + ) + # 2. 底座安装误差 (建议缩小到 2cm) + reset_mindbot_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "pose_range": { + "x": (-0.05, 0.05), + "y": (-0.05, 0.05), + "z": (0.74, 0.75), + "yaw": (-0.1, 0.1), + }, + "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)}, + }, + ) + # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适) + reset_mindbot_left_arm = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]), + "position_range": (-0.01, 0.01), + "velocity_range": (0.0, 0.0), + }, + ) + + reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", + params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) + reset_centrifuge = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("centrifuge"), + # "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, + "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + reset_tube = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("tube"), + # "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, + "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + gripper_lid_orientation_alignment = RewTerm( + func=mdp.gripper_lid_orientation_alignment, + params={ + "lid_cfg": SceneEntityCfg("tube"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "gripper_body_name": "right_hand_body", + "gripper_forward_axis": (0.0, 0.0, 1.0), + "gripper_width_axis": (0.0, 1.0, 0.0), + "lid_handle_axis": (0.0, 1.0, 0.0), + "max_angle_deg": 85.0, # 允许60度内的偏差 + }, + weight=5.0 #original 5.0 + ) + + # stage 2 + # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m + gripper_lid_position_alignment = RewTerm( + func=mdp.gripper_lid_position_alignment, + params={ + "lid_cfg": SceneEntityCfg("tube"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "height_offset": 0.115, # Z方向:lid 上方 0.1m + "std": 0.3, # 位置对齐的容忍度 + }, + weight=3.0 #original 3.0 + ) + approach_lid_penalty = RewTerm( + func=mdp.gripper_lid_distance_penalty, + params={ + "lid_cfg": SceneEntityCfg("tube"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "height_offset": 0.115, + }, + # weight 为负数表示惩罚。 + # 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。 + weight=-1.0 + ) + # # 【新增】精细对齐 (引导进入 2cm 圈) + # gripper_lid_fine_alignment = RewTerm( + # func=mdp.gripper_lid_position_alignment, + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "robot_cfg": SceneEntityCfg("Mindbot"), + # "left_gripper_name": "left_hand__l", + # "right_gripper_name": "left_hand_r", + # "height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致 + # "std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效 + # }, + # weight=10.0 # 高权重 + # ) + + # gripper_close_interaction = RewTerm( + # func=mdp.gripper_close_when_near, + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "robot_cfg": SceneEntityCfg("Mindbot"), + # "left_gripper_name": "left_hand__l", + # "right_gripper_name": "left_hand_r", + # "joint_names": ["left_hand_joint_left", "left_hand_joint_right"], + # "target_close_pos": 0.03, + # "height_offset": 0.04, + # "grasp_radius": 0.03, + # }, + # weight=10.0 + # ) + + # lid_lifted_reward = RewTerm( + # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 + # }, + # weight=10.0 # 给一个足够大的权重 + # ) + + # lid_lifting_reward = RewTerm( + # func=mdp.lid_lift_progress_reward, + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 + # "target_height_lift": 0.2, + # "height_offset": 0.07, # 与其他奖励保持一致 + # "std": 0.1 + # }, + # # 权重设大一点,告诉它这是最终目标 + # weight=10.0 + # ) + + + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + mindbot_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0}, + ) + + lid_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("tube"), "maximum_height": 2.0}, + ) + + # 新增:盖子掉落判定 + # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间) + lid_dropped = DoneTerm( + func=mdp.base_height_failure, # 复用高度判定函数 + params={ + "asset_cfg": SceneEntityCfg("tube"), + "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了 + }, + ) + + + +## +# Environment configuration +## + +@configclass +class CurriculumCfg: + pass + # action_rate = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}) + + # joint_vel = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}) + + # position_std_scheduler = CurrTerm( + # func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类 + # params={ + # # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键 + # "address": "rewards.gripper_lid_position_alignment.params.std", + + # # 修改逻辑:使用我们刚才写的函数 + # "modify_fn": mdp.annealing_std, + + # # 传给函数的参数 + # "modify_params": { + # "start_step": 00, # 约 600 轮 + # "end_step": 5_000, # 约 1200 轮 + # "start_std": 0.3, + # "end_std": 0.05, + # } + # } + # ) + +@configclass +class MindbotEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + events: EventCfg = EventCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + # curriculum: CurriculumCfg = CurriculumCfg() + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz + self.episode_length_s = 10 + # viewer settings + self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2) + # simulation settings + self.sim.dt = 1 / 120 #original 1 / 60 + self.sim.render_interval = self.decimation + # # 1. 基础堆内存 + self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB + self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 + self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024 + self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点 + self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch + + # # 5. 聚合对容量 (针对复杂的 Articulation) + self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024 diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/__init__.py new file mode 100644 index 0000000..4ab42b0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the environment.""" + +from isaaclab.envs.mdp import * # noqa: F401, F403 + +from .rewards import * # noqa: F401, F403 +from .terminations import * # noqa: F401, F403 +from .gripper import * # noqa: F401, F403 +from .curriculums import * # noqa: F401, F403 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/curriculums.py new file mode 100644 index 0000000..4e5d833 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/curriculums.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +from isaaclab.envs.mdp import modify_term_cfg + +def annealing_std( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + current_value: float, + start_step: int, + end_step: int, + start_std: float, + end_std: float +): + """ + 根据步数线性退火 std 值。 + + Args: + current_value: 当前的参数值 (系统自动传入) + start_step: 开始退火的步数 + end_step: 结束退火的步数 + start_std: 初始 std + end_std: 最终 std + """ + current_step = env.common_step_counter + + # 1. 还没到开始时间 -> 保持初始值 (或者不改) + if current_step < start_step: + # 如果当前值还没被设为 start_std,就强制设一下,否则不动 + return start_std + + # 2. 已经超过结束时间 -> 保持最终值 + elif current_step >= end_step: + return end_std + + # 3. 在中间 -> 线性插值 + else: + ratio = (current_step - start_step) / (end_step - start_step) + new_std = start_std + (end_std - start_std) * ratio + return new_std \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/gripper.py new file mode 100644 index 0000000..981ac01 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/gripper.py @@ -0,0 +1,54 @@ +# 假设这是在你的 mdp.py 文件中 + +from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg +from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction +from isaaclab.utils import configclass +import torch + + +class CoupledJointPositionAction(JointPositionAction): + def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): + super().__init__(cfg, env) + + @property + def action_dim(self) -> int: + """强制 ActionManager 认为只需要 1D 输入。""" + return 1 + + """ + 这是运行时被实例化的类。它继承自 JointPositionAction。 + """ + def process_actions(self, actions: torch.Tensor): + scale = self.cfg.scale + offset = self.cfg.offset + # store the raw actions + self._raw_actions[:] = torch.clamp(actions, -1, 1) + # apply the affine transformations + target_position_interval = self._raw_actions * scale + offset + right_cmd = target_position_interval + left_cmd = -target_position_interval + # import pdb; pdb.set_trace() + self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1) + # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}") + # clip actions + if self.cfg.clip is not None: + self._processed_actions = torch.clamp( + self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1] + ) + + +@configclass +class CoupledJointPositionActionCfg(JointPositionActionCfg): + """ + 配置类。关键在于设置 class_type 指向上面的实现类。 + """ + # !!! 关键点 !!! + # 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类" + class_type = CoupledJointPositionAction + + def __post_init__(self) -> None: + # 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它 + if len(self.joint_names) != 2: + raise ValueError("Requires exactly two joint names.") + super().__post_init__() + diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/rewards.py new file mode 100644 index 0000000..561c2f6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/rewards.py @@ -0,0 +1,881 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize joint position deviation from a target value.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + # wrap the joint positions to (-pi, pi) + joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids]) + # compute the reward + return torch.sum(torch.square(joint_pos - target), dim=1) + + +def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + """Reward the agent for lifting the lid above the minimal height.""" + lid: RigidObject = env.scene[lid_cfg.name] + # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,) + height = lid.data.root_pos_w[:, 2] + return torch.where(height > minimal_height, 1.0, 0.0) + +def gripper_lid_distance(env: ManagerBasedRLEnv, std: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for reaching the lid using tanh-kernel.""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # float 化,防止成为 (3,) 向量 + if not isinstance(std, float): + if torch.is_tensor(std): + std = std.item() + else: + std = float(std) + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + # body_idx = robot.find_bodies(gripper_body_name)[0] + # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1) + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :] + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6 + + return 1 - torch.tanh(distance / std) + + +def lid_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for grasping the lid (close and lifted).""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + + # Check if close and lifted + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > 0.1 + + return torch.where(is_close & is_lifted, 1.0, 0.0) + +def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Combined reward for reaching the lid AND lifting it.""" + # Get reaching reward + reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name) + # Get lifting reward + lift_reward = lid_is_lifted(env, minimal_height, lid_cfg) + # Combine rewards multiplicatively + return reach_reward * lift_reward + + + +# def gripper_lid_position_alignment(env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand__l", +# right_gripper_name: str = "left_hand_r", +# height_offset: float = 0.1, +# std: float = 0.1) -> torch.Tensor: +# """奖励函数:夹爪相对于lid的位置对齐。 + +# Args: +# env: 环境实例 +# lid_cfg: lid的配置 +# robot_cfg: 机器人的配置 +# left_gripper_name: 左侧夹爪body名称 +# right_gripper_name: 右侧夹爪body名称 +# side_distance: Y方向的期望距离 +# height_offset: Z方向的期望高度偏移 +# std: tanh核函数的标准差 + +# Returns: +# 位置对齐奖励 (num_envs,) +# """ +# lid: RigidObject = env.scene[lid_cfg.name] +# robot: Articulation = env.scene[robot_cfg.name] + +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# # 获取两个夹爪的位置 +# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) +# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + +# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) +# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + +# # 计算夹爪中心位置 +# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + +# # 计算相对位置(夹爪中心相对于 lid 中心) +# rel_pos = gripper_center - lid_pos_w # (num_envs, 3) + +# # X 方向位置:应该对齐(接近 0) +# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6 +# x_reward = 1.0 - torch.tanh(x_dist / std) + +# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance) +# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6 +# # y_error = torch.abs(y_dist - side_distance) +# # y_reward = 1.0 - torch.tanh(y_error / std) +# y_reward = 1.0 - torch.tanh(y_dist / std) + +# # Z 方向位置:应该在 lid 上方 height_offset 处 +# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6 +# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6 +# z_reward = 1.0 - torch.tanh(z_error / std) + +# # 组合位置奖励 +# position_reward = x_reward * y_reward * z_reward + +# return position_reward + + +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:夹爪相对于lid的位置对齐(简化版)。 + + 计算夹爪中心到目标点(lid上方height_offset处)的距离奖励。 + + Args: + env: 环境实例 + lid_cfg: lid的配置 + robot_cfg: 机器人的配置 + left_gripper_name: 左侧夹爪body名称 + right_gripper_name: 右侧夹爪body名称 + height_offset: Z方向的期望高度偏移(目标点在lid上方) + std: tanh核函数的标准差 + + Returns: + 位置对齐奖励 (num_envs,) + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 目标点:lid位置 + height_offset(Z方向) + target_pos = lid_pos_w.clone() + # print(target_pos[0]) + target_pos[:, 2] += height_offset # 在lid上方height_offset处 + # print(target_pos[0]) + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + # print(gripper_center[0]) + # 计算夹爪中心到目标点的3D距离 + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 距离奖励:距离越小,奖励越大 + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# ... (保留原有的 import) + +# ============================================================================== +# 新增:直接距离惩罚 (线性引导) +# ============================================================================== +def gripper_lid_distance_penalty( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.07, +) -> torch.Tensor: + """ + 计算夹爪中心到目标点的欧式距离。 + 返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。 + 这样:距离越远 -> 惩罚越大 (Total Reward 越低)。 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # 1. 目标点 (Lid中心 + 高度偏移) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 2. 夹爪中心 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + gripper_center = (left_pos + right_pos) / 2.0 + + # 3. 计算距离 + distance = torch.norm(gripper_center - lid_pos_w, dim=1) + + return distance + +# ============================================================================== +# 优化:位置对齐 (保持原函数,但确保逻辑清晰) +# ============================================================================== +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)""" + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + target_pos = lid_pos_w.clone() + target_pos[:, 2] += height_offset + + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# def gripper_lid_orientation_alignment( +# env, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# gripper_body_name: str = "left_hand_body", +# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z +# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) +# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y +# ) -> torch.Tensor: + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 获取姿态 +# lid_quat_w = lid.data.root_quat_w +# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) +# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + +# # 3. 垂直对齐 (Vertical Alignment) +# # 目标:夹爪 +Y 指向 World -Z +# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) +# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) +# # print(grip_fwd_world[0]) +# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + +# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) +# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add +# # 映射 [-1, 1] -> [0, 1],且全程有梯度 +# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加 +# # rew_vertical = (dot_vertical + 1.0) / 2.0 +# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0) +# rew_vertical = torch.pow(val_vertical, 2) + 1e-4 +# # nan -> 0 +# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + +# # 4. 偏航对齐 (Yaw Alignment) +# # 目标:夹爪 +Z 平行于 Lid +Y +# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) +# grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + +# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) +# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + +# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) +# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 +# # nan -> 0 +# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + +# # 5. 组合 +# total_reward = rew_vertical * rew_yaw + +# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + +# # debug +# if not torch.isfinite(total_reward).all(): +# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") +# print(f"rew_vertical: {rew_vertical}") +# print(f"rew_yaw: {rew_yaw}") + +# return total_reward + + +def gripper_lid_orientation_alignment( + env, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body", + gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z + gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) + lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y + max_angle_deg: float = 60.0, # 允许的最大角度偏差(度) +) -> torch.Tensor: + + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 获取姿态 + lid_quat_w = lid.data.root_quat_w + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + + # 3. 垂直对齐 (Vertical Alignment) + # 目标:夹爪前向轴指向 World -Z(向下) + grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) + grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) + world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + + # 计算点积(归一化后,点积 = cos(角度)) + dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) + dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) + + # 计算角度(弧度) + angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0)) + max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device) + + # 如果角度 <= max_angle_deg,给予奖励 + # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0 + angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0) + rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大 + + # 如果角度超过60度,奖励为0 + rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical)) + + # nan -> 0 + rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + + # 4. 偏航对齐 (Yaw Alignment) + # 目标:夹爪 +Z 平行于 Lid +Y + grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) + grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + + lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) + lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + + dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) + rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 + # nan -> 0 + rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + + # 5. 组合 + total_reward = rew_vertical * rew_yaw + + total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + + # debug + if not torch.isfinite(total_reward).all(): + print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") + print(f"rew_vertical: {rew_vertical}") + print(f"rew_yaw: {rew_yaw}") + + return total_reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body +# # 注意:确保 joint_names 只包含那两个夹爪关节 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合 +# target_open_pos: float = 0.0, # 【修正】张开是 0.0 +# target_close_pos: float = 0.03 # 【修正】闭合是 0.03 +# ) -> torch.Tensor: +# """ +# 逻辑很简单: +# 1. 如果 距离 < close_threshold:目标关节位置 = 0.03 (闭合) +# 2. 如果 距离 >= close_threshold:目标关节位置 = 0.0 (张开) +# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算距离 (末端 vs Lid中心) +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # distance: (num_envs,) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 3. 动态确定目标关节值 +# # 如果距离小于阈值,目标设为 close_pos,否则设为 open_pos +# # target_val: (num_envs, 1) +# # print(distance) +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 4. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# # joint_pos: (num_envs, 2) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] + +# # 取绝对值(防止左指是负数的情况,虽然你的配置里范围看起来都是正的,加上abs更保险) +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 5. 计算误差 +# # error: (num_envs,) -> 两个手指误差的平均值 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # # 6. 返回负奖励 (惩罚) +# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。 +# # return -error +# # ===================================================== +# # 🚀 核心修复:安全输出 +# # ===================================================== +# # 1. 使用 tanh 限制数值范围在 [-1, 0] +# reward = -torch.tanh(error / 0.01) + +# # 2. 过滤 NaN!这一步能救命! +# # 如果物理引擎算出 NaN,这里会把它变成 0.0,防止训练崩溃 +# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0) + +# return reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, +# target_open_pos: float = 0.0, +# target_close_pos: float = 0.03, +# height_offset: float = 0.06, # 新增:与位置奖励保持一致 +# ) -> torch.Tensor: +# """ +# 逻辑: +# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。 +# 2. 如果距离 < close_threshold:目标关节位置 = 闭合。 +# 3. 否则:目标关节位置 = 张开。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移) +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置 + +# # 获取夹爪位置 +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # 3. 计算距离 (夹爪 vs 目标抓取点) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 4. 动态确定目标关节值 +# # 靠近抓取点 -> 闭合;远离 -> 张开 +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 5. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 6. 计算误差并返回奖励 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # 使用 tanh 限制数值范围 +# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大 + +# return torch.nan_to_num(reward, nan=0.0) + + +def _is_grasped( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg, + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + height_offset: float, + grasp_radius: float, + target_close_pos: float +) -> torch.Tensor: + """ + 内部辅助函数:判定是否成功抓取。 + 条件: + 1. 夹爪中心在把手目标点附近 (Sphere Check) + 2. 夹爪处于闭合发力状态 (Joint Effort Check) + 3. (关键) 夹爪Z轴高度合适,不能压在盖子上面 (Z-Axis Check) + """ + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 目标点位置 (把手中心) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 3. 夹爪位置 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + gripper_center = (pos_L + pos_R) / 2.0 + + # 4. 条件一:距离判定 (在把手球范围内) + dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1) + is_near = dist_to_handle < grasp_radius + + # 5. 条件二:Z轴高度判定 (防止压在盖子上) + # 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高 + # 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多 + z_diff = gripper_center[:, 2] - lid_pos_w[:, 2] + is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差 + + # 6. 条件三:闭合判定 (正在尝试闭合) + joint_ids, _ = robot.find_joints(joint_names) + # 获取关节位置 (绝对值) + joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + + # === 修改闭合判定 === + + # 1. 关节位置判定 (Effort Check) + # 只要用力了就行,去掉上限判定,防止夹太紧被误判 + # 假设 0.05 是你的新 target_close_pos + is_effort_closing = torch.all(joint_pos > 0.005, dim=1) + + # 2. 几何排斥判定 (Geometry Check) + # 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小 + # 获取指尖位置 + dist_fingertips = torch.norm(pos_L - pos_R, dim=-1) + + # 假设把手厚度是 1cm (0.01m) + # 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西 + # 如果指尖距离 >= 0.005,说明中间有东西挡着 + is_not_empty = dist_fingertips > 0.005 + + # 综合判定: + # 1. 在把手附近 (is_near) + # 2. 高度合适 (is_z_valid) + # 3. 在用力闭合 (is_effort_closing) + # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了! + return is_near & is_z_valid & is_effort_closing & is_not_empty + + +def gripper_close_when_near( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + target_close_pos: float = 0.03, + height_offset: float = 0.02, + grasp_radius: float = 0.05 +) -> torch.Tensor: + + # 1. 判定基础抓取条件是否满足 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算夹紧程度 (Clamping Intensity) + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + # 获取当前关节位置绝对值 (num_envs, num_joints) + current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 计算两个指关节的平均闭合深度 + mean_joint_pos = torch.mean(current_joint_pos, dim=1) + + # 计算奖励系数:当前位置 / 目标闭合位置 + # 这样当关节越接近 0.03 时,奖励越接近 1.0 + # 强制让 Agent 产生“压满”动作的冲动 + clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0) + + # 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励 + # 如果没对准或者没夹到,奖励依然是 0 + return torch.where(is_grasped, clamping_factor, 0.0) + +# def gripper_close_when_near( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 +# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 +# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# target_close_pos: float = 0.03, +# height_offset: float = 0.03, +# grasp_radius: float = 0.02 # 球面半径 2cm +# ) -> torch.Tensor: + +# # 1. 获取位置 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 把手中心 + +# # 获取左右指尖位置 +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# # 计算夹爪中心 +# pos_center = (pos_L + pos_R) / 2.0 + +# # 2. 距离判定 (Is Inside Sphere?) +# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) +# is_in_sphere = (dist_center < grasp_radius).float() + +# # 3. "在中间"判定 (Is Between?) +# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 +# vec_LR = pos_R - pos_L # 左指指向右指 +# vec_LO = lid_pos_w - pos_L # 左指指向物体 + +# # 计算投影比例 t +# # P_proj = P_L + t * (P_R - P_L) +# # t = (vec_LO . vec_LR) / |vec_LR|^2 +# # 如果 0 < t < 1,说明投影在两个指尖之间 + +# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 +# dot = torch.sum(vec_LO * vec_LR, dim=-1) +# t = dot / len_sq + +# is_between = (t > 0.0) & (t < 1.0) +# is_between = is_between.float() + +# # 4. 闭合判定 +# joint_ids, _ = robot.find_joints(joint_names) +# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题 +# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) +# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1) +# # 只有当非常接近闭合目标时才给分 +# is_closing = (close_error < 0.005).float() # 允许 5mm 误差 + +# # 5. 最终奖励 +# # 只有三者同时满足才给 1.0 分 +# reward = is_in_sphere * is_between * is_closing + +# return torch.nan_to_num(reward, nan=0.0) + + +# def lid_is_lifted( +# env:ManagerBasedRLEnv, +# minimal_height:float, +# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid") +# ) -> torch.Tensor: + +# lid = env.scene[lid_cfg.name] +# lid_height = lid.data.root_pos_w[:, 2] +# reward=torch.where(lid_height > minimal_height, 1.0, 0.0) +# reward=torch.nan_to_num(reward, nan=0.0) + +# return reward + + +def lid_is_lifted( + env: ManagerBasedRLEnv, + minimal_height: float = 0.05, # 相对提升阈值 + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + height_offset: float = 0.03, + grasp_radius: float = 0.1, + target_close_pos: float = 0.03, +) -> torch.Tensor: + + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + lid = env.scene[lid_cfg.name] + + # 1. 获取高度 + current_height = lid.data.root_pos_w[:, 2] + # 自动获取初始高度 + initial_height = lid.data.default_root_state[:, 2] + + # 2. 计算提升量 + lift_height = torch.clamp(current_height - initial_height, min=0.0) + + # 3. 速度检查 (防撞飞) + # 如果速度 > 1.0 m/s,视为被撞飞,不给分 + lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1) + is_stable = lid_vel < 1.0 + + # 4. 计算奖励 + # 基础分:高度越高分越高 + shaping_reward = lift_height * 2.0 + # 成功分:超过阈值 + success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0) + + # 组合 + # 必须 is_grasped AND is_stable + reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height)) + + return torch.nan_to_num(reward, nan=0.0) + + + +def lid_lift_success_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand_body", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + # 关键参数 + settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调) + target_lift_height: float = 0.05, # 目标提升高度 (5cm) + grasp_dist_threshold: float = 0.05, # 抓取判定距离 + closed_pos: float = 0.03 # 夹爪闭合阈值 +) -> torch.Tensor: + """ + 提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。 + """ + # 1. 获取数据 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 盖子实时高度 + lid_z = lid.data.root_pos_w[:, 2] + lid_pos = lid.data.root_pos_w[:, :3] + + # 夹爪位置 + body_ids, _ = robot.find_bodies([left_gripper_name]) + gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] + + # 2. 【条件一:是否夹稳 (Is Grasped?)】 + # 距离检查 + distance = torch.norm(gripper_pos - lid_pos, dim=-1) + is_close = distance < grasp_dist_threshold + + # 闭合检查 + joint_ids, _ = robot.find_joints(joint_names) + joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 假设 > 80% 的闭合度算抓紧了 + is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1) + + is_grasped = is_close & is_closed + + # 3. 【条件二:计算相对提升 (Relative Lift)】 + # 当前高度 - 初始稳定高度 + current_lift = lid_z - settled_height + + # 4. 计算奖励 + lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0) + + # 基础提升分 (Shaping Reward) + lift_reward = lift_progress + + # 成功大奖 (Success Bonus) + # 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖 + success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0) + + # 组合:只有在 is_grasped 为 True 时才发放提升奖励 + total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward)) + + # 5. 安全输出 + return torch.nan_to_num(total_reward, nan=0.0) + + +def lid_lift_progress_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + initial_height: float = 0.8, + target_height_lift: float = 0.15, + height_offset: float = 0.02, # 必须与抓取奖励保持一致 + grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分 + target_close_pos: float = 0.03, + std: float = 0.05 # 标准差 +) -> torch.Tensor: + + # 1. 判定是否夹住 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算高度 + lid = env.scene[lid_cfg.name] + current_height = lid.data.root_pos_w[:, 2] + lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift) + # print(current_height) + # print(lift_height) + # 3. 计算奖励 + # 只有在 is_grasped 为 True 时,才发放高度奖励 + # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况 + progress = torch.tanh(lift_height / std) + + reward = torch.where(is_grasped, progress, 0.0) + + return reward + diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/terminations.py new file mode 100644 index 0000000..8b31be2 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge_tube_put/mdp/terminations.py @@ -0,0 +1,97 @@ +from __future__ import annotations +import torch +from typing import TYPE_CHECKING +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +def lid_dropped(env: ManagerBasedRLEnv, + minimum_height: float = -0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + return lid.data.root_pos_w[:, 2] < minimum_height + +def lid_successfully_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.03, + height_threshold: float = 0.2, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1) + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > height_threshold + return is_close & is_lifted + +def gripper_at_lid_side(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", # 改为两个下划线 + right_gripper_name: str = "left_hand_r", + side_distance: float = 0.05, + side_tolerance: float = 0.01, + alignment_tolerance: float = 0.02, + height_offset: float = 0.1, + height_tolerance: float = 0.02) -> torch.Tensor: + """Terminate when gripper center is positioned on the side of the lid at specified height. + + 坐标系说明: + - X 方向:两个夹爪朝中心合并的方向 + - Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致 + - Z 方向:高度方向 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + rel_pos = gripper_center - lid_pos_w + + # Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance) + y_dist = torch.abs(rel_pos[:, 1]) + y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance) + + # X 方向:应该对齐(接近 0) + x_dist = torch.abs(rel_pos[:, 0]) + x_ok = x_dist < alignment_tolerance + + # Z 方向:应该在 lid 上方 height_offset 处 + z_error = torch.abs(rel_pos[:, 2] - height_offset) + z_ok = z_error < height_tolerance + + # 所有条件都要满足 + return x_ok & y_ok & z_ok + + +def base_height_failure(env: ManagerBasedRLEnv, + minimum_height: float | None = None, + maximum_height: float | None = None, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + """Terminate when the robot's base height is outside the specified range.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + root_pos_z = asset.data.root_pos_w[:, 2] + + # check if height is outside the range + out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool) + if minimum_height is not None: + out_of_bounds |= root_pos_z < minimum_height + if maximum_height is not None: + out_of_bounds |= root_pos_z > maximum_height + + return out_of_bounds +