本地4090代码提交

2026-01-28 19:51:03 +08:00
parent eddcdd428f
commit 196378f2d3
41 changed files with 8447 additions and 450 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
+logs/
+outputs/
+.venv/
+
+
 # C++
 **/cmake-build*/
 **/build*/
--- a/scripts/rsl_rl/play.py
+++ b/scripts/rsl_rl/play.py
@@ -9,6 +9,12 @@

 import argparse
 import sys
+import os
+import torch
+import torchvision
+import gymnasium as gym
+import time
+from datetime import datetime

 from isaaclab.app import AppLauncher

@@ -18,7 +24,7 @@ import cli_args  # isort: skip
 # add argparse arguments
 parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
 parser.add_argument(
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -53,11 +59,6 @@ simulation_app = app_launcher.app

 """Rest everything follows."""

-import gymnasium as gym
-import os
-import time
-import torch
-
 from rsl_rl.runners import DistillationRunner, OnPolicyRunner

 from isaaclab.envs import (
@@ -79,6 +80,215 @@ from isaaclab_tasks.utils.hydra import hydra_task_config

 import mindbot.tasks  # noqa: F401

+# ==============================================================================
+#  [Update] 多相机、多环境录制类
+# ==============================================================================
+# 1. 确保文件顶部有以下导入
+import imageio
+import numpy as np
+
+"""use gpu compute to record video"""
+
+class MultiCameraRecorder:
+    def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+        self.env = env
+        self.camera_names = camera_names
+        self.env_indices = env_indices
+        self.output_dir = output_dir
+        self.fps = fps
+        self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
+        os.makedirs(self.output_dir, exist_ok=True)
+        
+        self.cameras = {}
+        for name in camera_names:
+            if name in self.env.unwrapped.scene.keys():
+                self.cameras[name] = self.env.unwrapped.scene[name]
+                print(f"[INFO] Camera {name} linked.")
+
+    def record_step(self):
+        """保持在 GPU 上克隆数据"""
+        for cam_name, camera_obj in self.cameras.items():
+            # 获取数据前强制同步一次（防止后端丢失）
+            rgb_data = camera_obj.data.output["rgb"] 
+            
+            for env_idx in self.env_indices:
+                # 使用 .clone() 保持在 GPU，但要注意显存占用
+                self.frames[cam_name][env_idx].append(rgb_data[env_idx].clone())
+
+    def save_videos(self, filename_suffix=""):
+        print(f"[INFO] Saving videos from GPU to Disk...")
+        for cam_name, env_dict in self.frames.items():
+            for env_idx, frame_list in env_dict.items():
+                if not frame_list: continue
+                
+                # 转换为 torchvision 格式 (T, C, H, W)
+                video_tensor = torch.stack(frame_list)
+                if video_tensor.shape[-1] == 4: # RGBA -> RGB
+                    video_tensor = video_tensor[..., :3]
+                
+                # 移动到 CPU 并保存
+                video_cpu = video_tensor.cpu()
+                output_path = os.path.join(self.output_dir, f"{cam_name}_env{env_idx}_{filename_suffix}.mp4")
+                
+                # 使用 torchvision 写入 (T, H, W, C)
+                torchvision.io.write_video(output_path, video_cpu, fps=self.fps)
+                
+                # 【关键】保存后立即释放显存
+                del video_tensor
+                del video_cpu
+                frame_list.clear() 
+        
+        torch.cuda.empty_cache()
+
+"""use cpu compute to record video"""
+# # 2. 修改 MultiCameraRecorder 类
+# class MultiCameraRecorder:
+#     def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_names = camera_names
+#         self.env_indices = env_indices
+#         self.output_dir = output_dir
+#         self.fps = fps
+#         self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
+#         os.makedirs(self.output_dir, exist_ok=True)
+#         self.cameras = {}
+#         for name in camera_names:
+#             try:
+#                 self.cameras[name] = self.env.unwrapped.scene[name]
+#                 print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
+#             except KeyError:
+#                 print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found!")
+
+#     def record_step(self):
+#         """在每个仿真步调用"""
+#         for cam_name, camera_obj in self.cameras.items():
+#             # [关键修改] 获取数据前先确保数据已同步
+#             # 这可以防止读取到正在渲染中的内存导致 access violation
+#             rgb_data = camera_obj.data.output["rgb"]
+            
+#             for env_idx in self.env_indices:
+#                 if env_idx >= rgb_data.shape[0]: continue
+                
+#                 # 转换为 CPU 上的 numpy，这种方式通常比 torchvision 的 tensor 堆叠更稳
+#                 frame = rgb_data[env_idx].clone().detach().cpu().numpy()
+#                 self.frames[cam_name][env_idx].append(frame)
+
+#     def save_videos(self, filename_suffix=""):
+#         """循环结束后调用"""
+#         print(f"[INFO][MultiCameraRecorder] Saving videos...")
+#         for cam_name, env_dict in self.frames.items():
+#             for env_idx, frame_list in env_dict.items():
+#                 if not frame_list: continue
+                
+#                 print(f"  -> Saving {cam_name} (Env {env_idx})...")
+                
+#                 # 处理格式并使用 imageio 保存
+#                 processed_frames = []
+#                 for img in frame_list:
+#                     # [0, 1] -> [0, 255]
+#                     if img.dtype != np.uint8:
+#                         if img.max() <= 1.01: img = (img * 255).astype(np.uint8)
+#                         else: img = img.astype(np.uint8)
+#                     # 去掉 Alpha 通道
+#                     if img.shape[-1] == 4: img = img[:, :, :3]
+#                     processed_frames.append(img)
+
+#                 fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
+#                 output_path = os.path.join(self.output_dir, fname)
+                
+#                 try:
+#                     # 使用 imageio 写入视频
+#                     imageio.mimsave(output_path, processed_frames, fps=self.fps)
+#                     print(f"     Saved: {output_path}")
+#                 except Exception as e:
+#                     print(f"     [ERROR] Failed to save {fname}: {e}")
+
+
+
+# class MultiCameraRecorder:
+#     """
+#     用于从 Isaac Lab 环境中录制多个相机的图像数据，并支持多个环境实例。
+#     """
+#     def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_names = camera_names
+#         self.env_indices = env_indices # 要录制的环境索引列表，例如 [0, 1]
+#         self.output_dir = output_dir
+#         self.fps = fps
+        
+#         # 数据结构: self.frames[camera_name][env_idx] = [list of tensors]
+#         self.frames = {
+#             cam_name: {env_idx: [] for env_idx in env_indices} 
+#             for cam_name in camera_names
+#         }
+
+#         os.makedirs(self.output_dir, exist_ok=True)
+
+#         # 获取场景中的相机对象
+#         self.cameras = {}
+#         for name in camera_names:
+#             try:
+#                 # 尝试获取相机句柄
+#                 self.cameras[name] = self.env.unwrapped.scene[name]
+#                 print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
+#             except KeyError:
+#                 print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found in scene! Skipping.")
+
+#     def record_step(self):
+#         """在每个仿真步调用"""
+#         for cam_name, camera_obj in self.cameras.items():
+#             # 获取 RGB 数据 (Shape: [num_envs, H, W, C])
+#             # 注意：如果数据类型是 RGBA，后面处理
+#             rgb_data = camera_obj.data.output["rgb"]
+
+#             for env_idx in self.env_indices:
+#                 # 边界检查
+#                 if env_idx >= rgb_data.shape[0]:
+#                     continue
+
+#                 # [关键] 立即 clone 并转到 CPU。
+#                 # 如果不转 CPU，4个相机 * 长时间录制会迅速耗尽 GPU 显存。
+#                 frame = rgb_data[env_idx].clone().detach().cpu()
+                
+#                 self.frames[cam_name][env_idx].append(frame)
+
+#     def save_videos(self, filename_suffix=""):
+#         """循环结束后调用，保存所有视频"""
+#         print(f"[INFO][MultiCameraRecorder] Saving videos for {len(self.cameras)} cameras...")
+        
+#         for cam_name, env_dict in self.frames.items():
+#             for env_idx, frame_list in env_dict.items():
+#                 if not frame_list:
+#                     continue
+                
+#                 print(f"  -> Processing {cam_name} (Env {env_idx}) with {len(frame_list)} frames...")
+                
+#                 # 堆叠 (T, H, W, C)
+#                 video_tensor = torch.stack(frame_list)
+
+#                 # 去除 Alpha 通道
+#                 if video_tensor.shape[-1] == 4:
+#                     video_tensor = video_tensor[..., :3]
+
+#                 # (T, H, W, C) -> (T, C, H, W)
+#                 video_tensor = video_tensor.permute(0, 3, 1, 2)
+
+#                 # 确保是 uint8
+#                 if video_tensor.dtype != torch.uint8:
+#                     if video_tensor.max() <= 1.0:
+#                         video_tensor = (video_tensor * 255).to(torch.uint8)
+#                     else:
+#                         video_tensor = video_tensor.to(torch.uint8)
+
+#                 # 文件名: left_hand_camera_env0_2024-xx-xx.mp4
+#                 fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
+#                 output_path = os.path.join(self.output_dir, fname)
+                
+#                 torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
+#                 print(f"     Saved: {output_path}")
+
+# ==============================================================================
+

@hydra_task_config(args_cli.task, args_cli.agent)
 def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
@@ -92,7 +302,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs

    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
    env_cfg.seed = agent_cfg.seed
    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device

@@ -122,7 +331,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    if isinstance(env.unwrapped, DirectMARLEnv):
        env = multi_agent_to_single_agent(env)

-    # wrap for video recording
+    # wrap for video recording (standard Gym recording - Viewport)
    if args_cli.video:
        video_kwargs = {
            "video_folder": os.path.join(log_dir, "videos", "play"),
@@ -151,12 +360,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    policy = runner.get_inference_policy(device=env.unwrapped.device)

    # extract the neural network module
-    # we do this in a try-except to maintain backwards compatibility.
    try:
-        # version 2.3 onwards
        policy_nn = runner.alg.policy
    except AttributeError:
-        # version 2.2 and below
        policy_nn = runner.alg.actor_critic

    # extract the normalizer
@@ -174,9 +380,42 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

    dt = env.unwrapped.step_dt

+    # ==========================================================================
+    # 初始化多相机录制器
+    # ==========================================================================
+    recorder = None
+    if args_cli.video:
+        # 1. 定义要录制的相机列表（需与 mindbot_env_cfg.py 中的命名一致）
+        target_cameras = [
+            "left_hand_camera",
+            "right_hand_camera",
+            "head_camera",
+            "chest_camera"
+        ]
+        
+        # 2. 定义要录制的环境索引 (env_nums)
+        # 默认只录制第 0 个环境，如果要录制前 2 个，改为 [0, 1]
+        # 注意：不要录制太多环境，否则写入视频时非常慢且占内存
+        envs_to_record = [0,1,2,3]
+        # 如果你想录制所有环境（不推荐，除非num_envs很小）：
+        # envs_to_record = list(range(env.unwrapped.num_envs))
+
+        save_dir = os.path.join(log_dir, "robot_camera_recordings")
+        
+        recorder = MultiCameraRecorder(
+            env=env, 
+            camera_names=target_cameras, 
+            env_indices=envs_to_record,
+            output_dir=save_dir, 
+            fps=int(1/dt)
+        )
+
    # reset environment
    obs = env.get_observations()
    timestep = 0
+    
+    print("[INFO] Starting simulation loop...")
+    
    # simulate environment
    while simulation_app.is_running():
        start_time = time.time()
@@ -186,12 +425,26 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
            actions = policy(obs)
            # env stepping
            obs, _, dones, _ = env.step(actions)
+            
+            # ==================================================================
+            # 每一帧录制数据
+            # ==================================================================
+            if recorder:
+                recorder.record_step()
+
            # reset recurrent states for episodes that have terminated
            policy_nn.reset(dones)
+
        if args_cli.video:
            timestep += 1
            # Exit the play loop after recording one video
            if timestep == args_cli.video_length:
+                # ==============================================================
+                # 循环结束时保存所有视频
+                # ==============================================================
+                if recorder:
+                    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+                    recorder.save_videos(filename_suffix=timestamp)
                break

        # time delay for real-time evaluation
@@ -208,3 +461,564 @@ if __name__ == "__main__":
    main()
    # close sim app
    simulation_app.close()
+
+# # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# # All rights reserved.
+# #
+# # SPDX-License-Identifier: BSD-3-Clause
+
+# """Script to play a checkpoint if an RL agent from RSL-RL."""
+
+# """Launch Isaac Sim Simulator first."""
+
+# import argparse
+# import sys
+# import os
+# import torch
+# import torchvision
+# import imageio
+# import numpy as np
+# import gymnasium as gym
+# import time
+# from datetime import datetime
+
+# from isaaclab.app import AppLauncher
+
+# # local imports
+# import cli_args  # isort: skip
+
+# # add argparse arguments
+# parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+# parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+# parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
+# parser.add_argument(
+#     "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+# )
+# parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# parser.add_argument(
+#     "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+# )
+# parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# parser.add_argument(
+#     "--use_pretrained_checkpoint",
+#     action="store_true",
+#     help="Use the pre-trained checkpoint from Nucleus.",
+# )
+# parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# # append RSL-RL cli arguments
+# cli_args.add_rsl_rl_args(parser)
+# # append AppLauncher cli args
+# AppLauncher.add_app_launcher_args(parser)
+# # parse the arguments
+# args_cli, hydra_args = parser.parse_known_args()
+# # always enable cameras to record video
+# if args_cli.video:
+#     args_cli.enable_cameras = True
+
+# # clear out sys.argv for Hydra
+# sys.argv = [sys.argv[0]] + hydra_args
+
+# # launch omniverse app
+# app_launcher = AppLauncher(args_cli)
+# simulation_app = app_launcher.app
+
+# """Rest everything follows."""
+
+# from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+# from isaaclab.envs import (
+#     DirectMARLEnv,
+#     DirectMARLEnvCfg,
+#     DirectRLEnvCfg,
+#     ManagerBasedRLEnvCfg,
+#     multi_agent_to_single_agent,
+# )
+# from isaaclab.utils.assets import retrieve_file_path
+# from isaaclab.utils.dict import print_dict
+
+# from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+# from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+# import isaaclab_tasks  # noqa: F401
+# from isaaclab_tasks.utils import get_checkpoint_path
+# from isaaclab_tasks.utils.hydra import hydra_task_config
+
+# import mindbot.tasks  # noqa: F401
+
+# # ==============================================================================
+# #  新增：自定义相机录制类
+# # ==============================================================================
+# class CameraRecorder:
+#     """
+#     用于从 Isaac Lab 环境中录制指定相机的图像数据并保存为视频。
+#     """
+#     def __init__(self, env, camera_name: str, output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_name = camera_name
+#         self.output_dir = output_dir
+#         self.fps = fps
+#         self.frames = [] # 用于存储每一帧的 Tensor
+
+#         # 确保输出目录存在
+#         os.makedirs(self.output_dir, exist_ok=True)
+
+#         # 尝试从场景中获取相机对象
+#         # env.unwrapped 访问原始的 ManagerBasedRLEnv
+#         try:
+#             self.camera = self.env.unwrapped.scene[camera_name]
+#             print(f"[INFO][CameraRecorder] Successfully found camera: {camera_name}")
+#         except KeyError:
+#             print(f"[ERROR][CameraRecorder] Camera '{camera_name}' not found in scene entities!")
+#             print(f"Available entities: {self.env.unwrapped.scene.keys()}")
+#             raise
+
+#     def record_step(self):
+#         """在每个仿真步调用，获取图像数据"""
+#         # 获取 RGB 数据。注意：Isaac Lab 的输出通常是 (num_envs, H, W, C)
+#         # 我们这里只取第一个环境 (env_index=0)
+#         # output["rgb"] 可能是 RGBA 或 RGB
+#         rgb_tensor = self.camera.data.output["rgb"][0] 
+
+#         # 如果数据在 GPU 上，克隆一份并 detach，防止被后续步骤覆盖
+#         # 此时先不转 CPU，以免拖慢仿真循环，最后保存时再转
+#         self.frames.append(rgb_tensor.clone().detach())
+
+#     def save_video(self, filename="robot_camera_video.mp4"):
+#         """循环结束后调用，将内存中的帧保存为视频"""
+#         if not self.frames:
+#             print("[WARN][CameraRecorder] No frames recorded.")
+#             return
+
+#         print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
+        
+#         # 将 tensor 转换为 numpy 格式
+#         video_frames = []
+#         for frame in self.frames:
+#             # 转换为 CPU 上的 numpy 数组 (H, W, C)
+#             img = frame.cpu().numpy()
+#             # 处理归一化数据 [0, 1] -> [0, 255]
+#             if img.dtype != np.uint8:
+#                 if img.max() <= 1.01:
+#                     img = (img * 255).astype(np.uint8)
+#                 else:
+#                     img = img.astype(np.uint8)
+#             # 处理 RGBA -> RGB
+#             if img.shape[-1] == 4:
+#                 img = img[:, :, :3]
+#             video_frames.append(img)
+
+#         output_path = os.path.join(self.output_dir, filename)
+        
+#         # 使用 imageio 写入视频，这种方式在 Windows 上更稳定
+#         try:
+#             imageio.mimsave(output_path, video_frames, fps=self.fps)
+#             print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
+#         except Exception as e:
+#             print(f"[ERROR][CameraRecorder] Failed to save video using imageio: {e}")
+#             # 如果 imageio 也失败，尝试最后的备选方案
+#             print("[INFO] Attempting to save frames as images instead...")
+#             # ... (可选：保存为一系列图片)
+#     # def save_video(self, filename="robot_camera_video.mp4"):
+#     #     """循环结束后调用，将内存中的帧保存为视频"""
+#     #     if not self.frames:
+#     #         print("[WARN][CameraRecorder] No frames recorded.")
+#     #         return
+
+#     #     print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
+        
+#     #     # 堆叠帧 -> (T, H, W, C)
+#     #     video_tensor = torch.stack(self.frames)
+
+#     #     # 处理 Alpha 通道：如果是 RGBA (4通道)，只取前3个 RGB
+#     #     if video_tensor.shape[-1] == 4:
+#     #         video_tensor = video_tensor[..., :3]
+
+#     #     # 转换为 (T, C, H, W) 以符合 torchvision 的输入要求
+#     #     # 原始: (T, H, W, C) -> Permute -> (T, C, H, W)
+#     #     video_tensor = video_tensor.permute(0, 3, 1, 2)
+
+#     #     # 确保是 uint8 类型 [0, 255]
+#     #     if video_tensor.dtype != torch.uint8:
+#     #          # 如果是 float [0,1]，则 * 255 并转 uint8
+#     #         if video_tensor.max() <= 1.0:
+#     #             video_tensor = (video_tensor * 255).to(torch.uint8)
+#     #         else:
+#     #             video_tensor = video_tensor.to(torch.uint8)
+
+#     #     # 移动到 CPU
+#     #     video_tensor = video_tensor.cpu()
+
+#     #     output_path = os.path.join(self.output_dir, filename)
+        
+#     #     # 使用 torchvision 保存视频
+#     #     torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
+#     #     print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
+
+# # ==============================================================================
+
+
+# @hydra_task_config(args_cli.task, args_cli.agent)
+# def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+#     """Play with RSL-RL agent."""
+#     # grab task name for checkpoint path
+#     task_name = args_cli.task.split(":")[-1]
+#     train_task_name = task_name.replace("-Play", "")
+
+#     # override configurations with non-hydra CLI arguments
+#     agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+#     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+#     # set the environment seed
+#     env_cfg.seed = agent_cfg.seed
+#     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+#     # specify directory for logging experiments
+#     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+#     log_root_path = os.path.abspath(log_root_path)
+#     print(f"[INFO] Loading experiment from directory: {log_root_path}")
+#     if args_cli.use_pretrained_checkpoint:
+#         resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+#         if not resume_path:
+#             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+#             return
+#     elif args_cli.checkpoint:
+#         resume_path = retrieve_file_path(args_cli.checkpoint)
+#     else:
+#         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+#     log_dir = os.path.dirname(resume_path)
+
+#     # set the log directory for the environment (works for all environment types)
+#     env_cfg.log_dir = log_dir
+
+#     # create isaac environment
+#     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+#     # convert to single-agent instance if required by the RL algorithm
+#     if isinstance(env.unwrapped, DirectMARLEnv):
+#         env = multi_agent_to_single_agent(env)
+
+#     # wrap for video recording (standard Gym recording)
+#     # 注意：这只是录制 Viewport 的画面，不是你机器人相机的画面
+#     if args_cli.video:
+#         video_kwargs = {
+#             "video_folder": os.path.join(log_dir, "videos", "play"),
+#             "step_trigger": lambda step: step == 0,
+#             "video_length": args_cli.video_length,
+#             "disable_logger": True,
+#         }
+#         print("[INFO] Recording videos during training.")
+#         print_dict(video_kwargs, nesting=4)
+#         env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+#     # wrap around environment for rsl-rl
+#     env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+#     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+#     # load previously trained model
+#     if agent_cfg.class_name == "OnPolicyRunner":
+#         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+#     elif agent_cfg.class_name == "DistillationRunner":
+#         runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+#     else:
+#         raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+#     runner.load(resume_path)
+
+#     # obtain the trained policy for inference
+#     policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+#     # extract the neural network module
+#     try:
+#         policy_nn = runner.alg.policy
+#     except AttributeError:
+#         policy_nn = runner.alg.actor_critic
+
+#     # extract the normalizer
+#     if hasattr(policy_nn, "actor_obs_normalizer"):
+#         normalizer = policy_nn.actor_obs_normalizer
+#     elif hasattr(policy_nn, "student_obs_normalizer"):
+#         normalizer = policy_nn.student_obs_normalizer
+#     else:
+#         normalizer = None
+
+#     # export policy to onnx/jit
+#     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+#     export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+#     export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+#     dt = env.unwrapped.step_dt
+
+#     # ==========================================================================
+#     # 初始化自定义相机录制器
+#     # ==========================================================================
+#     recorder = None
+#     if args_cli.video:
+#         # 这里的 output_dir 设为日志目录下的 hand_camera_videos
+#         save_dir = os.path.join(log_dir, "hand_camera_videos")
+#         # 这里的 "left_hand_camera" 必须对应 mindbot_env_cfg.py 中定义的名称
+#         recorder = CameraRecorder(env, "left_hand_camera", save_dir, fps=int(1/dt))
+
+#     # reset environment
+#     obs = env.get_observations()
+#     timestep = 0
+    
+#     print("[INFO] Starting simulation loop...")
+    
+#     # simulate environment
+#     while simulation_app.is_running():
+#         start_time = time.time()
+#         # run everything in inference mode
+#         with torch.inference_mode():
+#             # agent stepping
+#             actions = policy(obs)
+#             # env stepping
+#             obs, _, dones, _ = env.step(actions)
+            
+#             # ==================================================================
+#             # 每一帧录制数据
+#             # ==================================================================
+#             if recorder:
+#                 recorder.record_step()
+
+#             # reset recurrent states for episodes that have terminated
+#             policy_nn.reset(dones)
+
+#         if args_cli.video:
+#             timestep += 1
+#             # Exit the play loop after recording one video
+#             if timestep == args_cli.video_length:
+#                 # ==============================================================
+#                 # 循环结束时保存视频
+#                 # ==============================================================
+#                 if recorder:
+#                     timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+#                     recorder.save_video(filename=f"hand_cam_{timestamp}.mp4")
+#                 break
+
+#         # time delay for real-time evaluation
+#         sleep_time = dt - (time.time() - start_time)
+#         if args_cli.real_time and sleep_time > 0:
+#             time.sleep(sleep_time)
+
+#     # close the simulator
+#     env.close()
+
+
+# if __name__ == "__main__":
+#     # run the main function
+#     main()
+#     # close sim app
+#     simulation_app.close()
+
+
+# # # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# # # All rights reserved.
+# # #
+# # # SPDX-License-Identifier: BSD-3-Clause
+
+# # """Script to play a checkpoint if an RL agent from RSL-RL."""
+
+# # """Launch Isaac Sim Simulator first."""
+
+# # import argparse
+# # import sys
+
+# # from isaaclab.app import AppLauncher
+
+# # # local imports
+# # import cli_args  # isort: skip
+
+# # # add argparse arguments
+# # parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+# # parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+# # parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+# # parser.add_argument(
+# #     "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+# # )
+# # parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# # parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# # parser.add_argument(
+# #     "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+# # )
+# # parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# # parser.add_argument(
+# #     "--use_pretrained_checkpoint",
+# #     action="store_true",
+# #     help="Use the pre-trained checkpoint from Nucleus.",
+# # )
+# # parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# # # append RSL-RL cli arguments
+# # cli_args.add_rsl_rl_args(parser)
+# # # append AppLauncher cli args
+# # AppLauncher.add_app_launcher_args(parser)
+# # # parse the arguments
+# # args_cli, hydra_args = parser.parse_known_args()
+# # # always enable cameras to record video
+# # if args_cli.video:
+# #     args_cli.enable_cameras = True
+
+# # # clear out sys.argv for Hydra
+# # sys.argv = [sys.argv[0]] + hydra_args
+
+# # # launch omniverse app
+# # app_launcher = AppLauncher(args_cli)
+# # simulation_app = app_launcher.app
+
+# # """Rest everything follows."""
+
+# # import gymnasium as gym
+# # import os
+# # import time
+# # import torch
+
+# # from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+# # from isaaclab.envs import (
+# #     DirectMARLEnv,
+# #     DirectMARLEnvCfg,
+# #     DirectRLEnvCfg,
+# #     ManagerBasedRLEnvCfg,
+# #     multi_agent_to_single_agent,
+# # )
+# # from isaaclab.utils.assets import retrieve_file_path
+# # from isaaclab.utils.dict import print_dict
+
+# # from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+# # from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+# # import isaaclab_tasks  # noqa: F401
+# # from isaaclab_tasks.utils import get_checkpoint_path
+# # from isaaclab_tasks.utils.hydra import hydra_task_config
+
+# # import mindbot.tasks  # noqa: F401
+
+
+# # @hydra_task_config(args_cli.task, args_cli.agent)
+# # def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+# #     """Play with RSL-RL agent."""
+# #     # grab task name for checkpoint path
+# #     task_name = args_cli.task.split(":")[-1]
+# #     train_task_name = task_name.replace("-Play", "")
+
+# #     # override configurations with non-hydra CLI arguments
+# #     agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+# #     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+# #     # set the environment seed
+# #     # note: certain randomizations occur in the environment initialization so we set the seed here
+# #     env_cfg.seed = agent_cfg.seed
+# #     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+# #     # specify directory for logging experiments
+# #     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+# #     log_root_path = os.path.abspath(log_root_path)
+# #     print(f"[INFO] Loading experiment from directory: {log_root_path}")
+# #     if args_cli.use_pretrained_checkpoint:
+# #         resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+# #         if not resume_path:
+# #             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+# #             return
+# #     elif args_cli.checkpoint:
+# #         resume_path = retrieve_file_path(args_cli.checkpoint)
+# #     else:
+# #         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+# #     log_dir = os.path.dirname(resume_path)
+
+# #     # set the log directory for the environment (works for all environment types)
+# #     env_cfg.log_dir = log_dir
+
+# #     # create isaac environment
+# #     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+# #     # convert to single-agent instance if required by the RL algorithm
+# #     if isinstance(env.unwrapped, DirectMARLEnv):
+# #         env = multi_agent_to_single_agent(env)
+
+# #     # wrap for video recording
+# #     if args_cli.video:
+# #         video_kwargs = {
+# #             "video_folder": os.path.join(log_dir, "videos", "play"),
+# #             "step_trigger": lambda step: step == 0,
+# #             "video_length": args_cli.video_length,
+# #             "disable_logger": True,
+# #         }
+# #         print("[INFO] Recording videos during training.")
+# #         print_dict(video_kwargs, nesting=4)
+# #         env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+# #     # wrap around environment for rsl-rl
+# #     env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+# #     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+# #     # load previously trained model
+# #     if agent_cfg.class_name == "OnPolicyRunner":
+# #         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+# #     elif agent_cfg.class_name == "DistillationRunner":
+# #         runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+# #     else:
+# #         raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+# #     runner.load(resume_path)
+
+# #     # obtain the trained policy for inference
+# #     policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+# #     # extract the neural network module
+# #     # we do this in a try-except to maintain backwards compatibility.
+# #     try:
+# #         # version 2.3 onwards
+# #         policy_nn = runner.alg.policy
+# #     except AttributeError:
+# #         # version 2.2 and below
+# #         policy_nn = runner.alg.actor_critic
+
+# #     # extract the normalizer
+# #     if hasattr(policy_nn, "actor_obs_normalizer"):
+# #         normalizer = policy_nn.actor_obs_normalizer
+# #     elif hasattr(policy_nn, "student_obs_normalizer"):
+# #         normalizer = policy_nn.student_obs_normalizer
+# #     else:
+# #         normalizer = None
+
+# #     # export policy to onnx/jit
+# #     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+# #     export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+# #     export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+# #     dt = env.unwrapped.step_dt
+
+# #     # reset environment
+# #     obs = env.get_observations()
+# #     timestep = 0
+# #     # simulate environment
+# #     while simulation_app.is_running():
+# #         start_time = time.time()
+# #         # run everything in inference mode
+# #         with torch.inference_mode():
+# #             # agent stepping
+# #             actions = policy(obs)
+# #             # env stepping
+# #             obs, _, dones, _ = env.step(actions)
+# #             # reset recurrent states for episodes that have terminated
+# #             policy_nn.reset(dones)
+# #         if args_cli.video:
+# #             timestep += 1
+# #             # Exit the play loop after recording one video
+# #             if timestep == args_cli.video_length:
+# #                 break
+
+# #         # time delay for real-time evaluation
+# #         sleep_time = dt - (time.time() - start_time)
+# #         if args_cli.real_time and sleep_time > 0:
+# #             time.sleep(sleep_time)
+
+# #     # close the simulator
+# #     env.close()
+
+
+# # if __name__ == "__main__":
+# #     # run the main function
+# #     main()
+# #     # close sim app
+# #     simulation_app.close()
--- a/scripts/rsl_rl/play.py.bak
+++ b/scripts/rsl_rl/play.py.bak
@@ -0,0 +1,210 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to play a checkpoint if an RL agent from RSL-RL."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+import sys
+
+from isaaclab.app import AppLauncher
+
+# local imports
+import cli_args  # isort: skip
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument(
+    "--use_pretrained_checkpoint",
+    action="store_true",
+    help="Use the pre-trained checkpoint from Nucleus.",
+)
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# append RSL-RL cli arguments
+cli_args.add_rsl_rl_args(parser)
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli, hydra_args = parser.parse_known_args()
+# always enable cameras to record video
+if args_cli.video:
+    args_cli.enable_cameras = True
+
+# clear out sys.argv for Hydra
+sys.argv = [sys.argv[0]] + hydra_args
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+import os
+import time
+import torch
+
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+from isaaclab.envs import (
+    DirectMARLEnv,
+    DirectMARLEnvCfg,
+    DirectRLEnvCfg,
+    ManagerBasedRLEnvCfg,
+    multi_agent_to_single_agent,
+)
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.dict import print_dict
+
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+
+import mindbot.tasks  # noqa: F401
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Play with RSL-RL agent."""
+    # grab task name for checkpoint path
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    # override configurations with non-hydra CLI arguments
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+    # set the environment seed
+    # note: certain randomizations occur in the environment initialization so we set the seed here
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    # specify directory for logging experiments
+    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+    log_root_path = os.path.abspath(log_root_path)
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+
+    # set the log directory for the environment (works for all environment types)
+    env_cfg.log_dir = log_dir
+
+    # create isaac environment
+    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+
+    # wrap for video recording
+    if args_cli.video:
+        video_kwargs = {
+            "video_folder": os.path.join(log_dir, "videos", "play"),
+            "step_trigger": lambda step: step == 0,
+            "video_length": args_cli.video_length,
+            "disable_logger": True,
+        }
+        print("[INFO] Recording videos during training.")
+        print_dict(video_kwargs, nesting=4)
+        env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+    # wrap around environment for rsl-rl
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    # load previously trained model
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    # obtain the trained policy for inference
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+    # extract the neural network module
+    # we do this in a try-except to maintain backwards compatibility.
+    try:
+        # version 2.3 onwards
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        # version 2.2 and below
+        policy_nn = runner.alg.actor_critic
+
+    # extract the normalizer
+    if hasattr(policy_nn, "actor_obs_normalizer"):
+        normalizer = policy_nn.actor_obs_normalizer
+    elif hasattr(policy_nn, "student_obs_normalizer"):
+        normalizer = policy_nn.student_obs_normalizer
+    else:
+        normalizer = None
+
+    # export policy to onnx/jit
+    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+    dt = env.unwrapped.step_dt
+
+    # reset environment
+    obs = env.get_observations()
+    timestep = 0
+    # simulate environment
+    while simulation_app.is_running():
+        start_time = time.time()
+        # run everything in inference mode
+        with torch.inference_mode():
+            # agent stepping
+            actions = policy(obs)
+            # env stepping
+            obs, _, dones, _ = env.step(actions)
+            # reset recurrent states for episodes that have terminated
+            policy_nn.reset(dones)
+        if args_cli.video:
+            timestep += 1
+            # Exit the play loop after recording one video
+            if timestep == args_cli.video_length:
+                break
+
+        # time delay for real-time evaluation
+        sleep_time = dt - (time.time() - start_time)
+        if args_cli.real_time and sleep_time > 0:
+            time.sleep(sleep_time)
+
+    # close the simulator
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
--- a/scripts/rsl_rl/play.py.bak1
+++ b/scripts/rsl_rl/play.py.bak1
@@ -0,0 +1,270 @@
+import argparse
+import sys
+import os
+import time
+
+import imageio
+import numpy as np
+import h5py
+import torch
+
+# 先引入 AppLauncher，并尽早实例化 SimulationApp，避免 pxr 未加载
+from isaaclab.app import AppLauncher
+import cli_args  # isort: skip
+
+# CLI
+parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
+parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+parser.add_argument("--max_steps", type=int, default=None, help="最大步数，达到后提前退出")
+cli_args.add_rsl_rl_args(parser)
+AppLauncher.add_app_launcher_args(parser)
+args_cli, hydra_args = parser.parse_known_args()
+if args_cli.video:
+    args_cli.enable_cameras = True
+# 将 hydra 剩余参数传递
+sys.argv = [sys.argv[0]] + hydra_args
+
+# ==== 先实例化 SimulationApp ====
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
+import gymnasium as gym
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.dict import print_dict
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+import mindbot.tasks  # noqa: F401
+
+CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] No pre-trained checkpoint for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+    env_cfg.log_dir = log_dir
+
+    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    try:
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        policy_nn = runner.alg.actor_critic
+
+    # 导出模型
+    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+    normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+    # dt = env.unwrapped.step_dt
+
+    # # 录制缓冲
+    # cam_buffers = {n: [] for n in CAM_NAMES}
+    # joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    # t0 = time.time()
+
+    # obs = env.get_observations()
+    # timestep = 0
+    # step_count = 0
+    # try:
+    #     while simulation_app.is_running():
+    #         start_time = time.time()
+    #         with torch.inference_mode():
+    #             actions = policy(obs)
+    #             obs, _, dones, _ = env.step(actions)
+    #             policy_nn.reset(dones)
+
+    #         # 相机帧（取 env0，如需全部 env 去掉 [0]）
+    #         for name in CAM_NAMES:
+    #             if name not in env.unwrapped.scene.sensors:
+    #                 continue
+    #             cam = env.unwrapped.scene.sensors[name]
+    #             rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+    #             if rgba is None:
+    #                 continue
+    #             frame = rgba[0].cpu().numpy()
+    #             if frame.shape[-1] == 4:
+    #                 frame = frame[..., :3]
+    #             cam_buffers[name].append(frame)
+
+    #         # 关节 / 速度 / 动作
+    #         robot = env.unwrapped.scene["Mindbot"]
+    #         joint_log.append(robot.data.joint_pos.cpu().numpy())
+    #         joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+    #         action_log.append(actions.cpu().numpy())
+    #         ts_log.append(time.time() - t0)
+
+    #         step_count += 1
+    #         if args_cli.max_steps and step_count >= args_cli.max_steps:
+    #             break
+
+    #         if args_cli.video:
+    #             timestep += 1
+    #             if timestep == args_cli.video_length:
+    #                 break
+
+    #         sleep_time = dt - (time.time() - start_time)
+    #         if args_cli.real_time and sleep_time > 0:
+    #             time.sleep(sleep_time)
+    # finally:
+    #     # 保存 HDF5
+    #     h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    #     with h5py.File(h5_path, "w") as f:
+    #         f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+    #         f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+    #         f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+    #         f.create_dataset("timestamps", data=np.array(ts_log))
+    #         for name, frames in cam_buffers.items():
+    #             if not frames:
+    #                 continue
+    #             dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+    #             if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+    #                 fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+    #             else:
+    #                 fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+    #             dset.attrs["fx"] = fx
+    #             dset.attrs["fy"] = fy
+    #             dset.attrs["cx"] = cx
+    #             dset.attrs["cy"] = cy
+    #             dset.attrs["focal_length"] = 1.93
+    #     print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    #     # 可选：单路 MP4
+    #     head_frames = cam_buffers["cam_head"]
+    #     if head_frames:
+    #         fps = int(round(1.0 / dt))
+    #         video_path = os.path.join(log_dir, "cam_head.mp4")
+    #         imageio.mimsave(video_path, head_frames, fps=fps)
+    #         print(f"[INFO] Saved video to {video_path}")
+
+    #     env.close()
+    dt = env.unwrapped.step_dt
+
+    # 录制缓冲
+    cam_buffers = {n: [] for n in CAM_NAMES}
+    joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    t0 = time.time()
+
+    obs = env.get_observations()
+    timestep = 0
+    while simulation_app.is_running():
+        start_time = time.time()
+        with torch.inference_mode():
+            actions = policy(obs)
+            obs, _, dones, _ = env.step(actions)
+            policy_nn.reset(dones)
+
+        # 相机帧（取 env0，如需全部 env 去掉 [0]）
+        for name in CAM_NAMES:
+            if name not in env.unwrapped.scene.sensors:
+                continue
+            cam = env.unwrapped.scene.sensors[name]
+            rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+            if rgba is None:
+                continue
+            frame = rgba[0].cpu().numpy()
+            if frame.shape[-1] == 4:
+                frame = frame[..., :3]
+            cam_buffers[name].append(frame)
+
+        # 关节 / 速度 / 动作
+        robot = env.unwrapped.scene["Mindbot"]
+        joint_log.append(robot.data.joint_pos.cpu().numpy())
+        joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+        action_log.append(actions.cpu().numpy())
+        ts_log.append(time.time() - t0)
+
+        if args_cli.video:
+            timestep += 1
+            if timestep == args_cli.video_length:
+                break
+
+        sleep_time = dt - (time.time() - start_time)
+        if args_cli.real_time and sleep_time > 0:
+            time.sleep(sleep_time)
+
+    # 保存 HDF5
+    h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    with h5py.File(h5_path, "w") as f:
+        f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+        f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+        f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+        f.create_dataset("timestamps", data=np.array(ts_log))
+        for name, frames in cam_buffers.items():
+            if not frames:
+                continue
+            dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+            # 内参：按你相机 vector 设置
+            if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+                fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+            else:
+                fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+            dset.attrs["fx"] = fx
+            dset.attrs["fy"] = fy
+            dset.attrs["cx"] = cx
+            dset.attrs["cy"] = cy
+            dset.attrs["focal_length"] = 1.93
+    print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    # 可选：单路 MP4
+    head_frames = cam_buffers["cam_head"]
+    if head_frames:
+        fps = int(round(1.0 / dt))
+        video_path = os.path.join(log_dir, "cam_head.mp4")
+        imageio.mimsave(video_path, head_frames, fps=fps)
+        print(f"[INFO] Saved video to {video_path}")
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
--- a/source/mindbot/mindbot/robot/mindbot.py
+++ b/source/mindbot/mindbot/robot/mindbot.py
@@ -18,8 +18,9 @@ from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
 MINDBOT_CFG = ArticulationCfg(
    spawn=sim_utils.UsdFileCfg(
        # 1. UPDATE THE USD PATH to your local file
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd",
-        # usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
+        # C:\Users\PC\workpalce\maic_usd_assets\twinlab\MIC_sim-3.0\pupet_arm_sim_product\sim_data_acquisition\usds\Collected_robot_2_3\robot_2_4.usd
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
+        # usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd",
        rigid_props=sim_utils.RigidBodyPropertiesCfg(
            disable_gravity=False,
            max_depenetration_velocity=1.0,
@@ -66,26 +67,26 @@ MINDBOT_CFG = ArticulationCfg(
            # "l_joint6": 0.0,
                 # Right arm joints
            # -45° -> -0.7854
-            # "r_joint1": -0.7854,
-            # # 70° -> 1.2217
-            # "r_joint2": 1.2217,
-            # # 90° -> 1.5708
-            # "r_joint3": 1.5708,
-            # # -90° -> -1.5708
-            # "r_joint4": -1.5708,
-            # # 90° -> 1.5708
-            # "r_joint5": 1.5708,
-            # # 70° -> 1.2217
-            # "r_joint6": 1.2217,
-            # # Right arm joints
-            "r_joint1": 0.0,
-            "r_joint2": -1.5708,
+            "r_joint1": -0.7854,
+            # 70° -> 1.2217
+            "r_joint2": 1.2217,
+            # 90° -> 1.5708
+            "r_joint3": 1.5708,
+            # -90° -> -1.5708
+            "r_joint4": -1.5708,
+            # 90° -> 1.5708
+            "r_joint5": 1.5708,
+            # 70° -> 1.2217
+            "r_joint6": 1.2217,
+            # Right arm joints
+            # "r_joint1": 0.0,
+            # # "r_joint2": -1.5708,
            # "r_joint2": 0.0,
-            "r_joint3": 0.0,
-            "r_joint4": 0.0,
-            "r_joint5": 0.0,
-            "r_joint6": 0.0,
-            # # left wheel
+            # "r_joint3": 0.0,
+            # "r_joint4": 0.0,
+            # "r_joint5": 0.0,
+            # "r_joint6": 0.0,
+            # # # left wheel
            # "left_b_revolute_Joint": 0.0,
            # "left_f_revolute_Joint": 0.0,
            # # right wheel
@@ -102,7 +103,7 @@ MINDBOT_CFG = ArticulationCfg(
            # trunk
            "PrismaticJoint": 0.3,
            # head
-            "head_revoluteJoint": 0.5236
+            "head_revoluteJoint": 0.0 #0.5236
        },
        # The initial (x, y, z) position of the robot's base in the world
        pos=(0, 0, 0.05),
@@ -114,7 +115,7 @@ MINDBOT_CFG = ArticulationCfg(
            joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
-            stiffness=0.0, #100000.0,                  # Note: Tune this for desired control performance
+            stiffness=10000.0, #10000.0,                  # Note: Tune this for desired control performance
            damping=1000.0, #10000.0,                     # Note: Tune this for desired control performance
        ),
        # Group for the 6 right arm joints using a regular expression
@@ -122,7 +123,7 @@ MINDBOT_CFG = ArticulationCfg(
            joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
-            stiffness=10000.0, #100000.0,                  # Note: Tune this for desired control performance1
+            stiffness=0.0, #10000.0,                  # Note: Tune this for desired control performance1
            damping=1000.0, #10000.0,                   # Note: Tune this for desired control performance
        ),
        "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
@@ -142,9 +143,10 @@ MINDBOT_CFG = ArticulationCfg(
        ),
        "grippers": ImplicitActuatorCfg(
            joint_names_expr=[".*_hand_joint.*"], 
-            stiffness=10000.0, 
+            stiffness=10000.0, #10000.0
            damping=1000.0,
            effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
+            velocity_limit_sim=50.0,
        ),
    },
 )
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/init.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Template-centrifuge-lidup-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg",
+        # "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 48  # 稍微增加每轮步数
+    max_iterations = 5000   # 增加迭代次数，给它足够的时间学习
+    save_interval = 100
+    # experiment_name = "mindbot_grasp"  # 建议改个名，不要叫 cartpole 了
+    experiment_name = "mindbot_pullUltrasoundLidUp"
+    
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.7,
+        actor_obs_normalization=True,   # 开启归一化
+        critic_obs_normalization=True,  # 开启归一化
+        # 增加网络容量：三层 MLP，宽度足够
+        actor_hidden_dims=[128, 64, 32], 
+        critic_hidden_dims=[128, 64, 32],
+        activation="elu",
+    )
+    
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005, # 保持适度的探索
+        num_learning_epochs=5,
+        num_mini_batches=8,
+        learning_rate=3e-4, # 如果网络变大后不稳定，可以尝试降低到 5.0e-4
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+
+
+# @configclass
+# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+#     num_steps_per_env = 16
+#     max_iterations = 150
+#     save_interval = 50
+#     experiment_name = "cartpole_direct"
+#     policy = RslRlPpoActorCriticCfg(
+#         init_noise_std=1.0,
+#         actor_obs_normalization=False,
+#         critic_obs_normalization=False,
+#         actor_hidden_dims=[32, 32],
+#         critic_hidden_dims=[32, 32],
+#         activation="elu",
+#     )
+#     algorithm = RslRlPpoAlgorithmCfg(
+#         value_loss_coef=1.0,
+#         use_clipped_value_loss=True,
+#         clip_param=0.2,
+#         entropy_coef=0.005,
+#         num_learning_epochs=5,
+#         num_mini_batches=4,
+#         learning_rate=1.0e-3,
+#         schedule="adaptive",
+#         gamma=0.99,
+#         lam=0.95,
+#         desired_kl=0.01,
+#         max_grad_norm=1.0,
+#     )
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py
@@ -0,0 +1,636 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import A, T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.sensors import CameraCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, 0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, 0.73, 0.1055],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+CENTRIFUGE_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0,
+            linear_damping=0.5,
+            angular_damping=0.5,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # 【重要】必须要固定底座，否则机器人按盖子时，离心机会翻倒
+            # fix_root_link=True, 
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        # 1. 参照机器人配置，在这里定义初始关节角度
+        joint_pos={
+            "r1": math.radians(0.0), # 转子位置（无关紧要）
+            
+            # 【关键修改】：初始让盖子处于打开状态
+            # 您的 USD 限位是 (-100, 0)，-100度为最大开启
+            "r2": math.radians(-100.0), 
+        },
+        pos=(0.95, -0.3, 0.855),
+        rot=[-0.7071, 0.0, 0.0, 0.7071],
+    ),
+    actuators={
+        "lid_passive_mechanism": ImplicitActuatorCfg(
+            joint_names_expr=["r2"],
+            
+            # 【关键差异说明】
+            # 机器人的手臂 stiffness 是 10000.0，因为机器人需要精准定位且不晃动。
+            # 
+            # 但是！离心机盖子如果设为 10000，它就会像焊死在空中一样，机器人根本按不动。
+            # 这里设为 200.0 左右：
+            # 1. 力度足以克服重力，让盖子初始保持打开。
+            # 2. 力度又足够软，当机器人用力下压时，盖子会顺从地闭合。
+            stiffness=200.0, 
+            
+            # 阻尼：给一点阻力，模拟液压杆手感，防止像弹簧一样乱晃
+            damping=20.0,    
+            
+            # 确保力矩上限不要太小，否则托不住盖子
+            effort_limit_sim=1000.0, 
+            velocity_limit_sim=100.0,
+        ),
+        # 转子可以硬一点，不需要被机器人按动
+        "rotor_control": ImplicitActuatorCfg(
+            joint_names_expr=["r1"],
+            stiffness=1000.0,
+            damping=10.0,
+        ),
+    }
+)
+# CENTRIFUGE_CFG = ArticulationCfg(
+#     spawn=sim_utils.UsdFileCfg(
+#         usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
+#         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+#             disable_gravity=False, 
+#             max_depenetration_velocity=1.0,
+#             linear_damping=0.5,
+#             angular_damping=0.5,
+#         ),
+#         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+#             enabled_self_collisions=False,#
+#             solver_position_iteration_count=32,
+#             solver_velocity_iteration_count=16,
+#             stabilization_threshold=1e-6,
+#             fix_root_link=True,
+#         ),
+#     ),
+#     init_state=ArticulationCfg.InitialStateCfg(
+#         joint_pos={
+#             "r1": math.radians(-100.0),
+#             "r2": math.radians(-100.0),
+#         },
+#         pos=(0.95, -0.3, 0.855),
+#         rot=[-0.7071, 0.0, 0.0, 0.7071],
+#         ),
+#     # actuators={}
+#     actuators={
+#         "passive_damper": ImplicitActuatorCfg(
+#             # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+#             joint_names_expr=["r2"],
+            
+#             # 【关键逻辑】
+#             # 使用较高的 Stiffness (刚度) 模拟电机或弹簧锁死在特定位置
+#             # 这样盖子才不会掉下来
+#             stiffness=200.0, 
+            
+#             # 适当的阻尼，防止盖子像弹簧一样疯狂抖动
+#             damping=20.0,    
+            
+#             # 确保力矩足够克服盖子的重力
+#             effort_limit_sim=1000.0, 
+#             velocity_limit_sim=100.0,
+#         ),
+#         # 如果有其他关节(如转子r1)，可以单独配置
+#         "rotor_control": ImplicitActuatorCfg(
+#             joint_names_expr=["r1"],
+#             stiffness=1000.0,
+#             damping=10.0,
+#             # joint_names_expr=[".*"],
+            
+#             # stiffness=0.0, 
+#             # damping=10.0, 
+#             # effort_limit_sim=100.0, 
+#             # velocity_limit_sim=100.0,
+#         ),
+#     }
+# )
+
+
+ROOM_CFG = AssetBaseCfg(
+    prim_path="{ENV_REGEX_NS}/Room",
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+    ),
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class MindbotSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    # Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
+    lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+    """
+    添加相机定义
+    # head_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_type=["rgb"],
+    #     spawn=None,
+    # )
+    """
+    # left_hand_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_types=["rgb"],
+    #     spawn=None,
+    # )
+    # right_hand_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_types=["rgb"],
+    #     spawn=None,
+    # )
+    # head_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_types=["rgb"],
+    #     spawn=None,
+    # )
+    # chest_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_types=["rgb"],
+    #     spawn=None,
+    # )
+##
+# MDP settings
+##
+
+def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    left_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["l_joint[1-6]"],  # 左臂的6个关节
+        body_name="left_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+    
+    # right_arm_fixed = mdp.JointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
+        
+    #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
+    #     scale=0.0, 
+        
+    #     # 2. 偏移设为你的目标角度（弧度）：这告诉电机“永远停在这里”
+    #     # 对应 (135, -70, -90, 90, 90, -70)
+    #     offset={
+    #         "r_joint1": 2.3562,
+    #         "r_joint2": -1.2217,
+    #         "r_joint3": -1.5708,
+    #         "r_joint4": 1.5708,
+    #         "r_joint5": 1.5708,
+    #         "r_joint6": -1.2217,
+    #     },
+    # )
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+        
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
+        
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+    )
+
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.3,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
+        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    # 重置所有关节到 init_state（无偏移）must be
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 2. 底座安装误差 (建议缩小到 2cm)
+    reset_mindbot_base = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "pose_range": {
+                "x": (-0.05, 0.05),
+                "y": (-0.05, 0.05),
+                "z": (0.74, 0.75),
+                "yaw": (-0.1, 0.1),
+            },
+            "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
+        },
+    )
+    # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度，很合适)
+    reset_mindbot_left_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+            "position_range": (-0.01, 0.01),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_centrifuge = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("centrifuge"), 
+            "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+    reset_lid = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    gripper_lid_orientation_alignment = RewTerm(
+        func=mdp.gripper_lid_orientation_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "gripper_body_name": "left_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 1.0, 0.0),
+            "max_angle_deg": 85.0,  # 允许60度内的偏差
+        },
+        weight=5.0 #original 5.0
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    gripper_lid_position_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度    
+        },
+        weight=3.0 #original 3.0
+    )
+    approach_lid_penalty = RewTerm(
+        func=mdp.gripper_lid_distance_penalty,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,
+        },
+        # weight 为负数表示惩罚。
+        # 假设距离 0.5m，惩罚就是 -0.5 * 2.0 = -1.0 分。
+        weight=-1.0 
+    )
+    # 【新增】精细对齐 (引导进入 2cm 圈)
+    gripper_lid_fine_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+        },
+        weight=10.0 # 高权重
+    )
+
+    gripper_close_interaction = RewTerm( 
+        func=mdp.gripper_close_when_near,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
+            "target_close_pos": 0.03,
+            "height_offset": 0.04,
+            "grasp_radius": 0.03,
+        },
+        weight=10.0
+    )
+
+    lid_lifted_reward = RewTerm(
+        func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+        },
+        weight=10.0 # 给一个足够大的权重
+    )
+
+    lid_lifting_reward = RewTerm(
+        func=mdp.lid_lift_progress_reward,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+            "target_height_lift": 0.2,
+            "height_offset": 0.07, # 与其他奖励保持一致
+            "std": 0.1
+        },
+        # 权重设大一点，告诉它这是最终目标
+        weight=10.0 
+    )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    # lid_fly_away = DoneTerm(
+    #     func=mdp.base_height_failure,
+    #     params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    # )
+
+    # 新增：盖子掉落判定
+    # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    # lid_dropped = DoneTerm(
+    #     func=mdp.base_height_failure, # 复用高度判定函数
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("lid"), 
+    #         "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+    #     },
+    # )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class MindbotEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""This sub-module contains the functions that are specific to the environment."""
+
+from isaaclab.envs.mdp import *  # noqa: F401, F403
+
+from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
+from .gripper import *  # noqa: F401, F403
+from .curriculums import *  # noqa: F401, F403
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+from isaaclab.envs.mdp import modify_term_cfg 
+
+def annealing_std(
+    env: ManagerBasedRLEnv, 
+    env_ids: torch.Tensor, 
+    current_value: float, 
+    start_step: int, 
+    end_step: int, 
+    start_std: float, 
+    end_std: float
+):
+    """
+    根据步数线性退火 std 值。
+    
+    Args:
+        current_value: 当前的参数值 (系统自动传入)
+        start_step: 开始退火的步数
+        end_step: 结束退火的步数
+        start_std: 初始 std
+        end_std: 最终 std
+    """
+    current_step = env.common_step_counter
+    
+    # 1. 还没到开始时间 -> 保持初始值 (或者不改)
+    if current_step < start_step:
+        # 如果当前值还没被设为 start_std，就强制设一下，否则不动
+        return start_std
+        
+    # 2. 已经超过结束时间 -> 保持最终值
+    elif current_step >= end_step:
+        return end_std
+        
+    # 3. 在中间 -> 线性插值
+    else:
+        ratio = (current_step - start_step) / (end_step - start_step)
+        new_std = start_std + (end_std - start_std) * ratio
+        return new_std
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py
@@ -0,0 +1,54 @@
+# 假设这是在你的 mdp.py 文件中
+
+from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
+from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
+from isaaclab.utils import configclass
+import torch
+
+
+class CoupledJointPositionAction(JointPositionAction):
+    def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): 
+        super().__init__(cfg, env)
+
+    @property
+    def action_dim(self) -> int:
+        """强制 ActionManager 认为只需要 1D 输入。"""
+        return 1
+
+    """
+    这是运行时被实例化的类。它继承自 JointPositionAction。
+    """
+    def process_actions(self, actions: torch.Tensor):
+        scale = self.cfg.scale
+        offset = self.cfg.offset
+        # store the raw actions
+        self._raw_actions[:] = torch.clamp(actions, -1, 1)
+        # apply the affine transformations
+        target_position_interval = self._raw_actions * scale + offset
+        right_cmd = target_position_interval
+        left_cmd = -target_position_interval
+        # import pdb; pdb.set_trace()
+        self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
+        # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
+        # clip actions
+        if self.cfg.clip is not None:
+            self._processed_actions = torch.clamp(
+                self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
+            )
+
+
+@configclass
+class CoupledJointPositionActionCfg(JointPositionActionCfg):
+    """
+    配置类。关键在于设置 class_type 指向上面的实现类。
+    """
+    # !!! 关键点 !!! 
+    # 告诉 ActionManager: "当你看到这个配置时，请实例化 CoupledJointPositionAction 这个类"
+    class_type = CoupledJointPositionAction 
+
+    def __post_init__(self) -> None:
+        # 这里的检查逻辑是没问题的，因为 ActionManager 会在初始化时调用它
+        if len(self.joint_names) != 2:
+            raise ValueError("Requires exactly two joint names.")
+        super().__post_init__()
+
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py
@@ -0,0 +1,881 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+
+# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+#                                         lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#                                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                         left_gripper_name: str = "left_hand__l",
+#                                         right_gripper_name: str = "left_hand_r",
+#                                         height_offset: float = 0.1,
+#                                         std: float = 0.1) -> torch.Tensor:
+#     """奖励函数：夹爪相对于lid的位置对齐。
+    
+#     Args:
+#         env: 环境实例
+#         lid_cfg: lid的配置
+#         robot_cfg: 机器人的配置
+#         left_gripper_name: 左侧夹爪body名称
+#         right_gripper_name: 右侧夹爪body名称
+#         side_distance: Y方向的期望距离
+#         height_offset: Z方向的期望高度偏移
+#         std: tanh核函数的标准差
+        
+#     Returns:
+#         位置对齐奖励 (num_envs,)
+#     """
+#     lid: RigidObject = env.scene[lid_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     # 获取两个夹爪的位置
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+#     # 计算夹爪中心位置
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    
+#     # 计算相对位置（夹爪中心相对于 lid 中心）
+#     rel_pos = gripper_center - lid_pos_w  # (num_envs, 3)
+    
+#     # X 方向位置：应该对齐（接近 0）
+#     x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
+#     x_reward = 1.0 - torch.tanh(x_dist / std)
+    
+#     # Y 方向位置：应该在 lid 的 Y 方向两侧（距离 side_distance）
+#     y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
+#     # y_error = torch.abs(y_dist - side_distance)
+#     # y_reward = 1.0 - torch.tanh(y_error / std)
+#     y_reward = 1.0 - torch.tanh(y_dist / std)
+    
+#     # Z 方向位置：应该在 lid 上方 height_offset 处
+#     # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
+#     z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
+#     z_reward = 1.0 - torch.tanh(z_error / std)
+    
+#     # 组合位置奖励
+#     position_reward = x_reward * y_reward * z_reward
+    
+#     return position_reward
+
+
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：夹爪相对于lid的位置对齐（简化版）。
+    
+    计算夹爪中心到目标点（lid上方height_offset处）的距离奖励。
+    
+    Args:
+        env: 环境实例
+        lid_cfg: lid的配置
+        robot_cfg: 机器人的配置
+        left_gripper_name: 左侧夹爪body名称
+        right_gripper_name: 右侧夹爪body名称
+        height_offset: Z方向的期望高度偏移（目标点在lid上方）
+        std: tanh核函数的标准差
+        
+    Returns:
+        位置对齐奖励 (num_envs,)
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 目标点：lid位置 + height_offset（Z方向）
+    target_pos = lid_pos_w.clone()
+    # print(target_pos[0])
+    target_pos[:, 2] += height_offset  # 在lid上方height_offset处
+    # print(target_pos[0])
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    # print(gripper_center[0])
+    # 计算夹爪中心到目标点的3D距离
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 距离奖励：距离越小，奖励越大
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# ... (保留原有的 import)
+
+# ==============================================================================
+# 新增：直接距离惩罚 (线性引导)
+# ==============================================================================
+def gripper_lid_distance_penalty(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    height_offset: float = 0.07,
+) -> torch.Tensor:
+    """
+    计算夹爪中心到目标点的欧式距离。
+    返回的是正的距离值，我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
+    这样：距离越远 -> 惩罚越大 (Total Reward 越低)。
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # 1. 目标点 (Lid中心 + 高度偏移)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+
+    # 2. 夹爪中心
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    gripper_center = (left_pos + right_pos) / 2.0
+    
+    # 3. 计算距离
+    distance = torch.norm(gripper_center - lid_pos_w, dim=1)
+    
+    return distance
+
+# ==============================================================================
+# 优化：位置对齐 (保持原函数，但确保逻辑清晰)
+# ==============================================================================
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：基于 tanh 核函数的距离奖励 (靠近得越多，分数涨得越快)"""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    target_pos = lid_pos_w.clone()
+    target_pos[:, 2] += height_offset 
+
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] 
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] 
+    
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 
+
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 这里保持不变，关键在于我们在 config 里怎么设置 std 和 weight
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# def gripper_lid_orientation_alignment(
+#     env,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     gripper_body_name: str = "left_hand_body",
+#     gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+#     gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+#     lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+# ) -> torch.Tensor:
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 获取姿态
+#     lid_quat_w = lid.data.root_quat_w
+#     body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+#     gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+#     # 3. 垂直对齐 (Vertical Alignment)
+#     # 目标：夹爪 +Y 指向 World -Z
+#     grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+#     # print(grip_fwd_world[0])
+#     world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+#     dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+#     dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)  #add
+#     # 映射 [-1, 1] -> [0, 1]，且全程有梯度
+#     # 即使是朝天 (+1)，只要稍微往下转一点，分数就会增加
+#     # rew_vertical = (dot_vertical + 1.0) / 2.0
+#     val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
+#     rew_vertical = torch.pow(val_vertical, 2) + 1e-4
+#     # nan -> 0
+#     rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+#     # 4. 偏航对齐 (Yaw Alignment)
+#     # 目标：夹爪 +Z 平行于 Lid +Y
+#     grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+#     lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+#     lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+#     dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+#     rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+#     # nan -> 0
+#     rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+#     # 5. 组合
+#     total_reward = rew_vertical * rew_yaw
+
+#     total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+#     # debug
+#     if not torch.isfinite(total_reward).all():
+#         print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+#         print(f"rew_vertical: {rew_vertical}")
+#         print(f"rew_yaw: {rew_yaw}")
+        
+#     return total_reward
+
+
+def gripper_lid_orientation_alignment(
+    env,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "left_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+    lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+    max_angle_deg: float = 60.0,  # 允许的最大角度偏差（度）
+) -> torch.Tensor:
+    
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 获取姿态
+    lid_quat_w = lid.data.root_quat_w
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    # 3. 垂直对齐 (Vertical Alignment)
+    # 目标：夹爪前向轴指向 World -Z（向下）
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+    # 计算点积（归一化后，点积 = cos(角度)）
+    dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+    dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
+    
+    # 计算角度（弧度）
+    angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    
+    # 如果角度 <= max_angle_deg，给予奖励
+    # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_vertical = 1.0 - angle_normalized  # 角度越小，奖励越大
+    
+    # 如果角度超过60度，奖励为0
+    rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
+    
+    # nan -> 0
+    rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+    # 4. 偏航对齐 (Yaw Alignment)
+    # 目标：夹爪 +Z 平行于 Lid +Y
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+    lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+    lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+    dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+    rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_vertical * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {rew_vertical}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
+#     # 注意：确保 joint_names 只包含那两个夹爪关节
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, # 5cm 以内视为“近”，需要闭合
+#     target_open_pos: float = 0.0,  # 【修正】张开是 0.0
+#     target_close_pos: float = 0.03 # 【修正】闭合是 0.03
+# ) -> torch.Tensor:
+#     """
+#     逻辑很简单：
+#     1. 如果 距离 < close_threshold：目标关节位置 = 0.03 (闭合)
+#     2. 如果 距离 >= close_threshold：目标关节位置 = 0.0 (张开)
+#     3. 返回 -abs(当前关节 - 目标关节)，即惩罚误差。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算距离 (末端 vs Lid中心)
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # distance: (num_envs,)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 3. 动态确定目标关节值
+#     # 如果距离小于阈值，目标设为 close_pos，否则设为 open_pos
+#     # target_val: (num_envs, 1)
+#     # print(distance)
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 4. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # joint_pos: (num_envs, 2)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+    
+#     # 取绝对值（防止左指是负数的情况，虽然你的配置里范围看起来都是正的，加上abs更保险）
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 5. 计算误差
+#     # error: (num_envs,) -> 两个手指误差的平均值
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # # 6. 返回负奖励 (惩罚)
+#     # # 误差越大，惩罚越大。完全符合要求时奖励为 0。
+#     # return -error
+#     # =====================================================
+#     # 🚀 核心修复：安全输出
+#     # =====================================================
+#     # 1. 使用 tanh 限制数值范围在 [-1, 0]
+#     reward = -torch.tanh(error / 0.01)
+    
+#     # 2. 过滤 NaN！这一步能救命！
+#     # 如果物理引擎算出 NaN，这里会把它变成 0.0，防止训练崩溃
+#     reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
+    
+#     return reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", 
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, 
+#     target_open_pos: float = 0.0,
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.06,  # 新增：与位置奖励保持一致
+# ) -> torch.Tensor:
+#     """
+#     逻辑：
+#     1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
+#     2. 如果距离 < close_threshold：目标关节位置 = 闭合。
+#     3. 否则：目标关节位置 = 张开。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
+    
+#     # 获取夹爪位置
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # 3. 计算距离 (夹爪 vs 目标抓取点)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 4. 动态确定目标关节值
+#     # 靠近抓取点 -> 闭合；远离 -> 张开
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 5. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 6. 计算误差并返回奖励
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # 使用 tanh 限制数值范围
+#     reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪Z轴高度合适，不能压在盖子上面 (Z-Axis Check)
+    """
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+    
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：Z轴高度判定 (防止压在盖子上)
+    # 夹爪中心必须在把手目标点附近，允许上下微小浮动，但不能太高
+    # 假设 height_offset 是把手几何中心，那么夹爪不应该比它高太多
+    z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
+    is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取关节位置 (绝对值)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    # === 修改闭合判定 ===
+    
+    # 1. 关节位置判定 (Effort Check)
+    # 只要用力了就行，去掉上限判定，防止夹太紧被误判
+    # 假设 0.05 是你的新 target_close_pos
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    # 如果真的夹住了东西，左右指尖的距离应该被把手撑开，不会太小
+    # 获取指尖位置
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    
+    # 假设把手厚度是 1cm (0.01m)
+    # 如果指尖距离 < 0.005，说明两个手指已经碰到一起了（空夹），没夹住东西
+    # 如果指尖距离 >= 0.005，说明中间有东西挡着
+    is_not_empty = dist_fingertips > 0.005
+    
+    # 综合判定：
+    # 1. 在把手附近 (is_near)
+    # 2. 高度合适 (is_z_valid)
+    # 3. 在用力闭合 (is_effort_closing)
+    # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了！
+    return is_near & is_z_valid & is_effort_closing & is_not_empty
+
+
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.02,
+    grasp_radius: float = 0.05
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取当前关节位置绝对值 (num_envs, num_joints)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 计算两个指关节的平均闭合深度
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    # 计算奖励系数：当前位置 / 目标闭合位置
+    # 这样当关节越接近 0.03 时，奖励越接近 1.0
+    # 强制让 Agent 产生“压满”动作的冲动
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+    # 如果没对准或者没夹到，奖励依然是 0
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
+#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
+#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.03,
+#     grasp_radius: float = 0.02 # 球面半径 2cm
+# ) -> torch.Tensor:
+    
+#     # 1. 获取位置
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 把手中心
+    
+#     # 获取左右指尖位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 计算夹爪中心
+#     pos_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 距离判定 (Is Inside Sphere?)
+#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
+#     is_in_sphere = (dist_center < grasp_radius).float()
+    
+#     # 3. "在中间"判定 (Is Between?)
+#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
+#     vec_LR = pos_R - pos_L # 左指指向右指
+#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+    
+#     # 计算投影比例 t
+#     # P_proj = P_L + t * (P_R - P_L)
+#     # t = (vec_LO . vec_LR) / |vec_LR|^2
+#     # 如果 0 < t < 1，说明投影在两个指尖之间
+    
+#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
+#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
+#     t = dot / len_sq
+    
+#     is_between = (t > 0.0) & (t < 1.0)
+#     is_between = is_between.float()
+    
+#     # 4. 闭合判定
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
+#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
+#     # 只有当非常接近闭合目标时才给分
+#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
+    
+#     # 5. 最终奖励
+#     # 只有三者同时满足才给 1.0 分
+#     reward = is_in_sphere * is_between * is_closing
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+# def lid_is_lifted(
+#     env:ManagerBasedRLEnv,
+#     minimal_height:float,
+#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
+# ) -> torch.Tensor:
+
+#     lid = env.scene[lid_cfg.name]
+#     lid_height = lid.data.root_pos_w[:, 2]
+#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
+#     reward=torch.nan_to_num(reward, nan=0.0)
+    
+#     return reward
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py
+++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
+
+
+def base_height_failure(env: ManagerBasedRLEnv,
+                        minimum_height: float | None = None,
+                        maximum_height: float | None = None,
+                        asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Terminate when the robot's base height is outside the specified range."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    root_pos_z = asset.data.root_pos_w[:, 2]
+    
+    # check if height is outside the range
+    out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
+    if minimum_height is not None:
+        out_of_bounds |= root_pos_z < minimum_height
+    if maximum_height is not None:
+        out_of_bounds |= root_pos_z > maximum_height
+        
+    return out_of_bounds
+
--- a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py
@@ -177,200 +177,11 @@ def gripper_handle_orientation_alignment(
    # debug
    if not torch.isfinite(total_reward).all():
        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
-        print(f"rew_vertical: {rew_forward}")
+        print(f"rew_vertical: {dot_fwd}")
        print(f"rew_yaw: {rew_yaw}")
        
    return total_reward

-    # # 计算点积：我们希望它们垂直，即点积为 0
-    # dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
-    
-    # # 奖励：1.0 - abs(dot)。点积越小(越垂直)，分数越高
-    # rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
-    
-    # # 稍微增强一下敏感度，让它更倾向于完美垂直
-    # rew_perpendicular = rew_perpendicular * rew_perpendicular
-
-    # # 3. 组合奖励
-    # total_reward = rew_forward * rew_perpendicular
-    
-    # return torch.nan_to_num(total_reward, nan=0.0)
-
-
-# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
-#                                     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
-#                                     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
-#                                     handle_name: str = "Equipment_BB_13_C",
-#                                     left_gripper_name: str = "left_hand__l",
-#                                     right_gripper_name: str = "left_hand_r",
-#                                     height_offset: float = 0.03,
-#                                     std: float = 0.1) -> torch.Tensor:
-
-#     dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
-#     robot: Articulation = env.scene[robot_cfg.name]
-    
-#     # lid_pos_w = lid.data.root_pos_w[:, :3]
-#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
-#     # [num_envs, 3]
-#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
-#     # 目标点：lid位置 + height_offset（Z方向）
-#     target_pos = handle_pos_w.clone()
-#     # print(target_pos[0])
-#     target_pos[:, 0] -= height_offset  # 在lid上方height_offset处
-#     # print(target_pos[0])
-#     # 获取两个夹爪的位置
-#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
-#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
-    
-#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
-#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
-    
-#     # 计算夹爪中心位置
-#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
-#     # print(gripper_center[0])
-#     # 计算夹爪中心到目标点的3D距离
-#     distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
-    
-#     # 距离奖励：距离越小，奖励越大
-#     position_reward = 1.0 - torch.tanh(distance / std)
-    
-#     return position_reward
-
-
-# def gripper_handle_position_alignment(
-#     env,
-#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
-#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
-#     handle_name: str = "Equipment_BB_13_C",
-#     left_finger_name: str = "right_hand_l",
-#     right_finger_name: str = "right_hand__r",
-#     # 【修改点1】：根据你的描述，长轴是 Local Z，所以改为 (0, 0, 1)
-#     handle_local_axis: tuple = (0.0, 0.0, 1.0), 
-#     # 【新增】：明确指定手柄的“侧面”是 Local X 轴
-#     handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
-#     grasp_width: float = 0.03,
-#     grasp_offset: float = 0.06,
-#     std: float = 0.05,
-# ) -> torch.Tensor:
-#     """
-#     修正版：
-#     1. 修正了手柄长轴为 Local Z。
-#     2. 使用 Local X 作为手柄两侧的基准方向。
-#     """
-#     # 1. 获取数据
-#     dryingbox = env.scene[dryingbox_cfg.name]
-#     robot = env.scene[robot_cfg.name]
-
-#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
-
-#     # [num_envs, 3]
-#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
-#     # [num_envs, 4]
-#     handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
-
-#     l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
-#     r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
-#     l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
-#     r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
-
-#     # 2. 计算关键向量
-#     # 【修正】：计算世界坐标系下的手柄长轴 (应为 World Z)
-#     handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
-#     # 虽然本奖励函数主要位置点，不需要显式用到长轴向量做点积，但保留以备后续增加角度奖励
-#     handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) 
-
-#     # 【核心修改】：直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
-#     # 这种方法比 cross(approach, axis) 更准确，因为它尊重了物体自身的旋转
-#     handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
-#     side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
-
-#     # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
-#     # 注意：如果你的烘箱放置位置旋转了，这里也建议改为读取机器人的前方或者物体的Local方向
-#     approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
-
-#     # 3. 计算理想的抓取点
-#     # 抓取中心：从手柄中心沿着抓取方向（World X）反向退后 grasp_offset
-#     grasp_center = handle_pos_w - approach_dir * grasp_offset
-
-#     # 计算理想的左/右手指位置
-#     # 既然 side_vec_world 是 Local X (对应 World Y)
-#     # 我们希望手指在 Y 轴两侧张开
-#     ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
-#     ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
-    
-#     # 注意：这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
-#     # 如果发现左右手指交叉，交换上面的 + 和 - 即可。
-
-#     # 4. 计算距离奖励
-#     dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
-#     dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
-
-#     rew_l = 1.0 - torch.tanh(dist_l / std)
-#     rew_r = 1.0 - torch.tanh(dist_r / std)
-
-#     total_reward = (rew_l + rew_r) / 2.0
-
-#     return torch.nan_to_num(total_reward, nan=0.0)
-
-# def gripper_handle_position_alignment(
-#     env,
-#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
-#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
-#     handle_name: str = "Equipment_BB_13_C",
-#     left_finger_name: str = "right_hand_l",     # 左手指 body 名称
-#     right_finger_name: str = "right_hand__r",   # 右手指 body 名称
-#     handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
-#     grasp_width: float = 0.03,                  # 预期的抓取宽度 (米)
-#     grasp_offset: float = 0.03,                 # 手指尖距离手柄中心的进深偏移
-#     std: float = 0.05,                          # 奖励函数的敏感度
-# ) -> torch.Tensor:
-#     """
-#     双指位置奖励：引导左/右手指分别到达手柄的两侧。
-#     """
-#     # 1. 获取数据
-#     dryingbox = env.scene[dryingbox_cfg.name]
-#     robot = env.scene[robot_cfg.name]
-
-#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
-#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
-#     handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
-
-#     l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
-#     r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
-#     l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
-#     r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
-
-#     # 2. 计算理想的抓取点
-#     # 获取手柄当前轴向
-#     handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
-#     handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
-
-#     # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
-#     approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
-
-#     # 计算垂直于手柄轴向和接近方向的向量，这就是“侧面”方向
-#     side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
-
-#     # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点，留出空间)
-#     grasp_center = handle_pos_w - approach_dir * grasp_offset
-
-#     # 计算理想的左/右手指位置 (在中心点两侧张开)
-#     # 注意：这里需要根据你的坐标系确认哪个是左，哪个是右
-#     ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
-#     ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
-
-#     # 3. 计算距离奖励
-#     dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
-#     dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
-
-#     rew_l = 1.0 - torch.tanh(dist_l / std)
-#     rew_r = 1.0 - torch.tanh(dist_r / std)
-
-#     # 取平均值
-#     total_reward = (rew_l + rew_r) / 2.0
-
-#     return torch.nan_to_num(total_reward, nan=0.0)
-

 # 在 rewards.py 中，修改 _is_grasped 函数，使其支持 dryingbox 和 handle_name
 def _is_grasped(
@@ -400,8 +211,8 @@ def _is_grasped(
    handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
    handle_pos_w[:, 1] += height_offset  # X方向偏移
-    handle_pos_w[:, 0] += 0.02
-    handle_pos_w[:, 2] -= 0.04
+    handle_pos_w[:, 0] += 0.005
+    # handle_pos_w[:, 2] -= 0.04
    # 3. 夹爪位置
    l_ids, _ = robot.find_bodies([left_gripper_name])
    r_ids, _ = robot.find_bodies([right_gripper_name])
@@ -411,7 +222,7 @@ def _is_grasped(
    
    # 4. 条件一：距离判定 (在把手球范围内)
    dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
-    if env.common_step_counter % 100 == 0:
+    if env.common_step_counter % 500 == 0:
        print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
    is_near = dist_to_handle < grasp_radius
    
@@ -479,80 +290,51 @@ def gripper_close_when_near(

 # def gripper_close_when_near(
 #     env: ManagerBasedRLEnv,
-#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
 #     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
-#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
-#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
-#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
-#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
-#     target_close_pos: float = 0.03,
-#     height_offset: float = 0.03,
-#     grasp_radius: float = 0.02 # 球面半径 2cm
+#     handle_name: str = "Equipment_BB_13_C",
+#     left_gripper_name: str = "right_hand_l",
+#     right_gripper_name: str = "right_hand__r",
+#     joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"],  # 保留接口但不强依赖
+#     # 关键参数
+#     handle_thickness: float = 0.0388,   # 把手厚度（米），≈3.88cm
+#     thickness_tol: float = 0.008,       # 容差（米），≈0.8cm，控制惩罚/奖励斜率
+#     grasp_radius: float = 0.04,         # 抓取判定半径，略大于把手厚度
+#     height_offset: float = 0.00,        # 如需上下偏移可调整
+#     target_close_pos: float = 0.03,     # 保留给 joint/clamping 兼容
 # ) -> torch.Tensor:
-    
-#     # 1. 获取位置
-#     lid = env.scene[lid_cfg.name]
+#     dryingbox = env.scene[dryingbox_cfg.name]
 #     robot = env.scene[robot_cfg.name]
-#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
-#     lid_pos_w[:, 2] += height_offset # 把手中心

-#     # 获取左右指尖位置
+#     # 目标位置（把手中心，可做微偏移）
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
+#     handle_pos[:, 1] += height_offset
+
+#     # 手指位置与中心
 #     l_ids, _ = robot.find_bodies([left_gripper_name])
 #     r_ids, _ = robot.find_bodies([right_gripper_name])
 #     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
 #     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+#     center = (pos_L + pos_R) / 2.0

-#     # 计算夹爪中心
-#     pos_center = (pos_L + pos_R) / 2.0
+#     # 到把手的距离（只在近距离才给分）
+#     dist = torch.norm(center - handle_pos, dim=-1)
+#     is_near = dist < grasp_radius

-#     # 2. 距离判定 (Is Inside Sphere?)
-#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
-#     is_in_sphere = (dist_center < grasp_radius).float()
+#     # 实际两指间距
+#     finger_dist = torch.norm(pos_L - pos_R, dim=-1)

-#     # 3. "在中间"判定 (Is Between?)
-#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
-#     vec_LR = pos_R - pos_L # 左指指向右指
-#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+#     # 奖励：两指间距越接近把手厚度越好，平滑衰减
+#     error = torch.abs(finger_dist - handle_thickness)
+#     close_reward = 1.0 - torch.tanh(error / thickness_tol)

-#     # 计算投影比例 t
-#     # P_proj = P_L + t * (P_R - P_L)
-#     # t = (vec_LO . vec_LR) / |vec_LR|^2
-#     # 如果 0 < t < 1，说明投影在两个指尖之间
+#     # 避免 NaN
+#     close_reward = torch.nan_to_num(close_reward, nan=0.0)

-#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
-#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
-#     t = dot / len_sq
-    
-#     is_between = (t > 0.0) & (t < 1.0)
-#     is_between = is_between.float()
-    
-#     # 4. 闭合判定
-#     joint_ids, _ = robot.find_joints(joint_names)
-#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
-#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
-#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
-#     # 只有当非常接近闭合目标时才给分
-#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
-    
-#     # 5. 最终奖励
-#     # 只有三者同时满足才给 1.0 分
-#     reward = is_in_sphere * is_between * is_closing
-    
-#     return torch.nan_to_num(reward, nan=0.0)
+#     return torch.where(is_near, close_reward, torch.zeros_like(close_reward))


-# def lid_is_lifted(
-#     env:ManagerBasedRLEnv,
-#     minimal_height:float,
-#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
-# ) -> torch.Tensor:
-
-#     lid = env.scene[lid_cfg.name]
-#     lid_height = lid.data.root_pos_w[:, 2]
-#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
-#     reward=torch.nan_to_num(reward, nan=0.0)
-    
-#     return reward


 def lid_is_lifted(
@@ -698,39 +480,7 @@ def lid_lift_progress_reward(
    
    return reward

-# def lid_grasped_bonus(
-#     env: ManagerBasedRLEnv,
-#     lid_cfg: SceneEntityCfg,
-#     robot_cfg: SceneEntityCfg,
-#     left_gripper_name: str,
-#     joint_names: list,
-#     distance_threshold: float = 0.05,
-#     closed_pos: float = 0.03
-# ) -> torch.Tensor:
-#     # 1. 计算距离
-#     lid = env.scene[lid_cfg.name]
-#     robot = env.scene[robot_cfg.name]
-#     body_ids, _ = robot.find_bodies([left_gripper_name])
-#     gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
-#     lid_pos = lid.data.root_pos_w[:, :3]
-#     distance = torch.norm(gripper_pos - lid_pos, dim=-1)

-#     # 2. 检查距离是否达标
-#     is_close = distance < distance_threshold
-    
-#     # 3. 检查夹爪是否闭合
-#     joint_ids, _ = robot.find_joints(joint_names)
-#     joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
-#     # 检查是否接近闭合 (比如 > 0.02)
-#     is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
-    
-#     # 4. 给分
-#     # 只有同时满足才给 1.0
-#     reward = torch.where(is_close & is_closed, 1.0, 0.0)
-    
-#     return torch.nan_to_num(reward)
-
-# 在 rewards.py 文件开头添加调试函数
 def debug_robot_physics(
    env: ManagerBasedRLEnv,
    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
@@ -771,64 +521,15 @@ def debug_robot_physics(
    # 4. 检查是否有异常高的速度（可能导致碰撞问题）
    high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
    
-    if torch.any(high_vel_mask):
-        env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
-        for env_id in env_ids[:3]:  # 只打印前3个环境
-            print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
-            print(f"  机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
-            print(f"  机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
-            print(f"  夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
-            print(f"  夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
-            print(f"  最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
-
-# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
-#                                     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
-#                                     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
-#                                     handle_name: str = "Equipment_BB_13_C",
-#                                     left_gripper_name: str = "right_hand_l",
-#                                     right_gripper_name: str = "right_hand__r",
-#                                     height_offset: float = 0.03,
-#                                     std: float = 0.1) -> torch.Tensor:
-
-#     dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
-#     robot: Articulation = env.scene[robot_cfg.name]
-    
-#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
-#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
-#     target_pos = handle_pos_w.clone()
-#     target_pos[:, 1] += height_offset
-#     target_pos[:, 0] += 0.02
-#     target_pos[:, 2] -= 0.04
-    
-#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
-#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
-    
-#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
-#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
-    
-#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
-#     distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
-    
-#     # ===== 添加调试打印 =====
-#     # 当夹爪接近手柄时（距离 < 0.15m）打印物理信息
-#     close_mask = distance < 0.03
-#     if torch.any(close_mask):
-#         debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
-#     # ========================
-    
-#     position_reward = 1.0 - torch.tanh(distance / std)
-    
-#     # 计算距离后插入
-#     if env.common_step_counter % 200 == 0:
-#         print("pos_align dbg env0: "
-#             f"handle={handle_pos_w[0].cpu().numpy()}, "
-#             f"target={target_pos[0].cpu().numpy()}, "
-#             f"gripL={left_gripper_pos[0].cpu().numpy()}, "
-#             f"gripR={right_gripper_pos[0].cpu().numpy()}, "
-#             f"center={gripper_center[0].cpu().numpy()}, "
-#             f"dist={distance[0].item():.4f}")
-
-#     return position_reward
+    # if torch.any(high_vel_mask):
+    #     env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
+    #     for env_id in env_ids[:3]:  # 只打印前3个环境
+    #         print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
+    #         print(f"  机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
+    #         print(f"  机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
+    #         print(f"  夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
+    #         print(f"  夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
+            # print(f"  最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")

 def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
                                    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
@@ -846,8 +547,8 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
    target_pos = handle_pos_w.clone()
    target_pos[:, 1] += height_offset
-    # target_pos[:, 0] += 0.02
-    target_pos[:, 2] -= 0.02
+    target_pos[:, 0] += 0.005
+    # target_pos[:, 2] -= 0.04
    
    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
@@ -878,3 +579,257 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
            f"dist={distance[0].item():.4f}")

    return position_reward
+
+
+
+def gripper_open_when_far(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,   # 保留接口一致性，但不再使用
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,               # 保留接口一致性，但不再使用
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,              # 不再使用
+    dist_threshold: float = 0.5,    # 不再使用
+    target_open_dist: float = 0.06, # 两手指完全打开时的距离（米）
+) -> torch.Tensor:
+    """
+    奖励夹爪最大张开状态：
+    只根据左右两根手指之间的实际距离计算奖励，
+    当距离接近 target_open_dist (0.06m) 时奖励最大。
+    """
+
+    # 1. 获取机器人
+    robot = env.scene[robot_cfg.name]
+
+    # 2. 获取左右手指的世界坐标
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+
+    # 3. 计算两手指之间的实际距离
+    finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+    # 4. 奖励：距离越接近最大打开距离，奖励越高
+    error = torch.abs(finger_dist - target_open_dist)
+    reward = 1.0 - torch.tanh(error / 0.01)  # 0.01m = 1cm 作为平滑尺度
+
+    return reward
+
+
+def gripper_open_penalty_when_far(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    dist_threshold: float = 0.03,   # 抓取豁免区（在此距离内不惩罚）
+    # max_penalty_dist: float = 0.2,  # 这个参数现在没用了，或者可以用来控制斜率，这里简化掉
+) -> torch.Tensor:
+    """
+    修正后的惩罚项：
+    在 dist_threshold (3cm) 之外，如果夹爪没有完全张开，则给予惩罚。
+    """
+    robot = env.scene[robot_cfg.name]
+    dryingbox = env.scene[dryingbox_cfg.name]
+
+    # 1. 获取位置
+    handle_ids, _ = dryingbox.find_bodies([handle_name])
+    handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    # pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    # pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+    # gripper_center = (pos_L + pos_R) / 2.0
+    # distance = torch.norm(gripper_center - handle_pos, dim=1)
+
+    # 优化计算：直接用预计算的指尖中心（如果你的环境支持），或者保持原样
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    distance = torch.norm(gripper_center - handle_pos, dim=1)
+    
+    # 2. 计算闭合程度 (0=全开, 1=全闭)
+    # 获取关节位置更直接，比算指尖距离更准
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos = robot.data.joint_pos[:, joint_ids]
+    # 假设 0.0 是开，0.03 是闭。取平均绝对值归一化。
+    # closedness 越大表示闭合越多
+    closedness = torch.clamp(torch.mean(torch.abs(joint_pos), dim=1) / 0.03, 0.0, 1.0)
+    
+    # 3. 惩罚门控逻辑 (修正的核心)
+    # 如果 distance > dist_threshold，开始惩罚
+    # 使用 tanh 或 sigmoid 平滑过渡，避免硬开关导致的震荡
+    # 当 dist = 0.03 时，gate = 0
+    # 当 dist = 0.05 时，gate 接近 1
+    is_far_gate = torch.tanh((distance - dist_threshold) * 100.0) 
+    # clip 到 [0, 1]，防止 distance < threshold 时出现负数干扰
+    is_far_gate = torch.clamp(is_far_gate, 0.0, 1.0)
+
+    # 4. 计算最终惩罚
+    # 只有在 "远处" 且 "闭合" 时才扣分
+    penalty = is_far_gate * closedness
+    
+    return -penalty
+
+# def gripper_open_penalty_when_far(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     handle_name: str,
+#     left_gripper_name: str,
+#     right_gripper_name: str,
+#     joint_names: list,
+#     dist_threshold: float = 0.03,   # 3cm 抓取保护区
+#     max_penalty_dist: float = 0.2, # 0.55m 惩罚激活边界
+# ) -> torch.Tensor:
+#     """
+#     使用 Sigmoid 激活函数的惩罚项：
+#     1. 距离把手 < 0.55m 时，惩罚迅速激活并保持在高位 (接近 1.0)。
+#     2. 距离把手 < 0.03m 时，惩罚强制清零，允许闭合抓取。
+#     3. 两手指距离越小，惩罚值越大。
+#     """
+#     robot = env.scene[robot_cfg.name]
+#     dryingbox = env.scene[dryingbox_cfg.name]
+
+#     # 1. 获取位置信息
+#     handle_ids, _ = dryingbox.find_bodies([handle_name])
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 2. 计算距离
+#     gripper_center = (pos_L + pos_R) / 2.0
+#     distance = torch.norm(gripper_center - handle_pos, dim=1)
+#     finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+#     # 3. 距离激活门控 (Sigmoid 方法)
+#     # 当 distance < 0.55 时，(0.55 - dist) 为正，sigmoid 趋近于 1.0
+#     # 50.0 是陡峭系数，系数越大，0.55m 处的切换越像开关
+#     dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0)
+
+#     # 4. 抓取区保护 (Deadzone)
+#     # 距离小于 3cm 时，is_not_near 变为 0，完全撤销惩罚
+#     is_not_near = (distance > dist_threshold).float()
+
+#     # 5. 闭合程度计算 (0 = 全开, 1 = 全闭)
+#     # 假设手指最大张开距离为 0.06m
+#     max_open = 0.06
+#     closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0)
+
+#     # 6. 综合计算并 Clamp
+#     # 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时，值才会很大
+#     penalty_magnitude = dist_gate * is_not_near * closedness
+    
+#     # 返回负值作为惩罚 (值域 [-1, 0])
+#     return -torch.clamp(penalty_magnitude, 0.0, 1.0)
+
+
+# def gripper_open_until_near(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     handle_name: str,
+#     left_gripper_name: str,
+#     right_gripper_name: str,
+#     joint_names: list,
+#     dist_threshold: float = 0.03,  # 3cm 阈值
+# ) -> torch.Tensor:
+#     """奖励夹爪在远离目标时保持打开状态，接近后不再约束。"""
+#     robot = env.scene[robot_cfg.name]
+#     dryingbox = env.scene[dryingbox_cfg.name]
+
+#     # 1. 计算夹爪中心到把手的距离
+#     handle_ids, _ = dryingbox.find_bodies([handle_name])
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
+    
+#     distance = torch.norm(gripper_center - handle_pos, dim=1)
+    
+#     # 2. 获取夹爪当前关节位置 (假设 0 是完全打开)
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 计算当前关节位置与 0（打开状态）的偏差
+#     joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     mean_error = torch.mean(joint_pos_abs, dim=1)
+    
+#     # 3. 计算奖励逻辑
+#     # 距离 > 3cm 时：关节越接近 0（打开），奖励越高 (最大为 1.0)
+#     # 距离 <= 3cm 时：奖励为 0，不干涉模型闭合
+#     is_far = distance > dist_threshold
+#     open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减
+    
+#     return torch.where(is_far, open_reward, torch.zeros_like(open_reward))
+
+# ... 在文件末尾添加以下函数 ...
+
+def gripper_grasp_stability(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg,
+    joint_names: list,
+    vel_threshold: float = 0.005  # 极低的速度阈值，判定为“静止”
+) -> torch.Tensor:
+    """
+    奖励夹爪在闭合状态下的稳定性。
+    只有当关节速度极小时（即没有在抖动或移动），才发放奖励。
+    """
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    
+    # 获取夹爪关节的绝对速度
+    joint_vel = torch.abs(robot.data.joint_vel[:, joint_ids])
+    
+    # 判定是否所有夹爪关节都处于“静止”状态
+    is_stable = torch.all(joint_vel < vel_threshold, dim=1)
+    
+    # 只有在闭合状态下谈稳定性才有意义，所以通常建议配合 is_grasped 使用
+    # 这里我们只返回稳定性系数
+    return is_stable.float()
+
+def gripper_close_interaction_v2(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    target_close_pos: float = 0.012,
+    height_offset: float = 0.00,
+    grasp_radius: float = 0.05
+) -> torch.Tensor:
+    """
+    增强版闭合奖励：结合了位置、闭合程度和物理稳定性。
+    """
+    # 1. 基础抓取判定
+    is_grasped = _is_grasped(
+        env, dryingbox_cfg, robot_cfg, handle_name,
+        left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算闭合程度 (0.0 到 1.0)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    
+    # 3. 计算稳定性因子 (速度越快，奖励衰减越厉害)
+    joint_vel = torch.norm(robot.data.joint_vel[:, joint_ids], dim=1)
+    # 使用 exp(-x) 让奖励对速度非常敏感，速度稍大奖励就迅速消失
+    stability_factor = torch.exp(-20.0 * joint_vel) 
+    
+    # 综合奖励：必须满足抓取条件，且闭合得越深、动作越稳，分数越高
+    return torch.where(is_grasped, clamping_factor * stability_factor, torch.zeros_like(clamping_factor))
--- a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1
+++ b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1
@@ -0,0 +1,967 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+def gripper_handle_orientation_alignment(
+    env,
+    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    handle_name: str = "Equipment_BB_13_C",
+    gripper_body_name: str = "right_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 夹爪指尖指向 (Local Z)
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 夹爪手指开合轴 (Local Y)
+    lid_handle_axis: tuple = (0.0, 0.0, 1.0),  
+    max_angle_deg: float =60.0,
+    # handle_local_axis 参数已删除，因为直接使用世界坐标系 Z 轴更稳健
+) -> torch.Tensor:
+    """
+    修正版：
+    1. 解决了梯度消失：使用 (dot+1)/2 替代 clamp，保证全角度有梯度。
+    2. 解决了轴向错误：直接认定手柄是垂直的 (World Z)，修复抓取姿态。
+    """
+    # 1. 获取数据
+    # dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它，节省计算)
+    robot = env.scene[robot_cfg.name]
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    box=env.scene[dryingbox_cfg.name]
+    hd_ids,_=box.find_bodies([handle_name],preserve_order=True)
+    handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:]
+
+    # --- 约束 A: 指向约束 (Pointing) ---
+    # 目标：夹爪 Z 轴 指向 World +X
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    
+    target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
+    
+    dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1)
+    dot_fwd=torch.clamp(dot_fwd,-1.0,1.0)
+    angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_forward = 1.0 - angle_normalized  # 角度越小，奖励越大
+    # 如果角度超过60度，奖励为0
+    rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward))
+    # nan -> 0
+    rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward)
+
+    # [关键修改 1] 使用软化奖励，提供全局梯度
+    # 范围 [0, 1]: 1.0 是完美对齐，0.5 是垂直，0.0 是完全背对
+    # 这样即使现在是指向地面的 (dot=0)，它也有 0.5 分，且知道往上抬分会变高
+    # rew_forward = (dot_fwd + 1.0) / 2.0
+    # 如果你希望能更陡峭一点，可以加个平方： rew_forward = rew_forward * rew_forward
+
+    # --- 约束 B: 抓取姿态约束 (Grasp Alignment) ---
+    # 目标：夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z)
+    # 这样手指就是水平开合的
+    
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local))
+    
+    # [关键修改 2] 强制假设手柄是垂直的 (World Z)
+    # 除非你的烘箱会倾斜倒在地上，否则这是最稳健的写法，不用管局部坐标系
+    handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1)
+    handle_axis_world=quat_apply(handle_quat_w,handle_axis_local)
+    dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1)
+    rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_forward * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {rew_vertical}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+    # # 计算点积：我们希望它们垂直，即点积为 0
+    # dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
+    
+    # # 奖励：1.0 - abs(dot)。点积越小(越垂直)，分数越高
+    # rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
+    
+    # # 稍微增强一下敏感度，让它更倾向于完美垂直
+    # rew_perpendicular = rew_perpendicular * rew_perpendicular
+
+    # # 3. 组合奖励
+    # total_reward = rew_forward * rew_perpendicular
+    
+    # return torch.nan_to_num(total_reward, nan=0.0)
+
+
+# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
+#                                     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+#                                     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                     handle_name: str = "Equipment_BB_13_C",
+#                                     left_gripper_name: str = "left_hand__l",
+#                                     right_gripper_name: str = "left_hand_r",
+#                                     height_offset: float = 0.03,
+#                                     std: float = 0.1) -> torch.Tensor:
+
+#     dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     # lid_pos_w = lid.data.root_pos_w[:, :3]
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+#     # [num_envs, 3]
+#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+#     # 目标点：lid位置 + height_offset（Z方向）
+#     target_pos = handle_pos_w.clone()
+#     # print(target_pos[0])
+#     target_pos[:, 0] -= height_offset  # 在lid上方height_offset处
+#     # print(target_pos[0])
+#     # 获取两个夹爪的位置
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+#     # 计算夹爪中心位置
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+#     # print(gripper_center[0])
+#     # 计算夹爪中心到目标点的3D距离
+#     distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+#     # 距离奖励：距离越小，奖励越大
+#     position_reward = 1.0 - torch.tanh(distance / std)
+    
+#     return position_reward
+
+
+# def gripper_handle_position_alignment(
+#     env,
+#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     handle_name: str = "Equipment_BB_13_C",
+#     left_finger_name: str = "right_hand_l",
+#     right_finger_name: str = "right_hand__r",
+#     # 【修改点1】：根据你的描述，长轴是 Local Z，所以改为 (0, 0, 1)
+#     handle_local_axis: tuple = (0.0, 0.0, 1.0), 
+#     # 【新增】：明确指定手柄的“侧面”是 Local X 轴
+#     handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
+#     grasp_width: float = 0.03,
+#     grasp_offset: float = 0.06,
+#     std: float = 0.05,
+# ) -> torch.Tensor:
+#     """
+#     修正版：
+#     1. 修正了手柄长轴为 Local Z。
+#     2. 使用 Local X 作为手柄两侧的基准方向。
+#     """
+#     # 1. 获取数据
+#     dryingbox = env.scene[dryingbox_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+
+#     # [num_envs, 3]
+#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+#     # [num_envs, 4]
+#     handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
+
+#     l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
+#     r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
+#     l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
+#     r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
+
+#     # 2. 计算关键向量
+#     # 【修正】：计算世界坐标系下的手柄长轴 (应为 World Z)
+#     handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
+#     # 虽然本奖励函数主要位置点，不需要显式用到长轴向量做点积，但保留以备后续增加角度奖励
+#     handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) 
+
+#     # 【核心修改】：直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
+#     # 这种方法比 cross(approach, axis) 更准确，因为它尊重了物体自身的旋转
+#     handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
+#     side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
+
+#     # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
+#     # 注意：如果你的烘箱放置位置旋转了，这里也建议改为读取机器人的前方或者物体的Local方向
+#     approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
+
+#     # 3. 计算理想的抓取点
+#     # 抓取中心：从手柄中心沿着抓取方向（World X）反向退后 grasp_offset
+#     grasp_center = handle_pos_w - approach_dir * grasp_offset
+
+#     # 计算理想的左/右手指位置
+#     # 既然 side_vec_world 是 Local X (对应 World Y)
+#     # 我们希望手指在 Y 轴两侧张开
+#     ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
+#     ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
+    
+#     # 注意：这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
+#     # 如果发现左右手指交叉，交换上面的 + 和 - 即可。
+
+#     # 4. 计算距离奖励
+#     dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
+#     dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
+
+#     rew_l = 1.0 - torch.tanh(dist_l / std)
+#     rew_r = 1.0 - torch.tanh(dist_r / std)
+
+#     total_reward = (rew_l + rew_r) / 2.0
+
+#     return torch.nan_to_num(total_reward, nan=0.0)
+
+# def gripper_handle_position_alignment(
+#     env,
+#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     handle_name: str = "Equipment_BB_13_C",
+#     left_finger_name: str = "right_hand_l",     # 左手指 body 名称
+#     right_finger_name: str = "right_hand__r",   # 右手指 body 名称
+#     handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
+#     grasp_width: float = 0.03,                  # 预期的抓取宽度 (米)
+#     grasp_offset: float = 0.03,                 # 手指尖距离手柄中心的进深偏移
+#     std: float = 0.05,                          # 奖励函数的敏感度
+# ) -> torch.Tensor:
+#     """
+#     双指位置奖励：引导左/右手指分别到达手柄的两侧。
+#     """
+#     # 1. 获取数据
+#     dryingbox = env.scene[dryingbox_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+#     handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
+
+#     l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
+#     r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
+#     l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
+#     r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
+
+#     # 2. 计算理想的抓取点
+#     # 获取手柄当前轴向
+#     handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
+#     handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
+
+#     # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
+#     approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
+
+#     # 计算垂直于手柄轴向和接近方向的向量，这就是“侧面”方向
+#     side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
+
+#     # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点，留出空间)
+#     grasp_center = handle_pos_w - approach_dir * grasp_offset
+
+#     # 计算理想的左/右手指位置 (在中心点两侧张开)
+#     # 注意：这里需要根据你的坐标系确认哪个是左，哪个是右
+#     ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
+#     ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
+
+#     # 3. 计算距离奖励
+#     dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
+#     dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
+
+#     rew_l = 1.0 - torch.tanh(dist_l / std)
+#     rew_r = 1.0 - torch.tanh(dist_r / std)
+
+#     # 取平均值
+#     total_reward = (rew_l + rew_r) / 2.0
+
+#     return torch.nan_to_num(total_reward, nan=0.0)
+
+
+# 在 rewards.py 中，修改 _is_grasped 函数，使其支持 dryingbox 和 handle_name
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,  # 改为 dryingbox_cfg
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,  # 新增 handle_name 参数
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪X轴距离合适，不能压在把手上 (X-Axis Check)
+    """
+    # 1. 获取对象
+    dryingbox = env.scene[dryingbox_cfg.name]  # 改为 dryingbox
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
+    handle_pos_w[:, 1] += height_offset  # X方向偏移
+    handle_pos_w[:, 0] += 0.02
+    # handle_pos_w[:, 2] -= 0.04
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
+    if env.common_step_counter % 100 == 0:
+        print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：X轴距离判定 (防止压在把手上)
+    x_diff = gripper_center[:, 0] - handle_pos_w[:, 0]
+    is_x_valid = torch.abs(x_diff) < 0.03  # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    is_not_empty = dist_fingertips > 0.005
+
+    # 在计算 dist_to_handle 后
+    if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15:
+        delta = gripper_center[0] - handle_pos_w[0]
+        print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, "
+                f"center={gripper_center[0].cpu().numpy()}, "
+                f"delta={delta.cpu().numpy()}, "
+                f"dist={dist_to_handle[0].item():.4f}, "
+                f"fingertip_dist={dist_fingertips[0].item():.4f}, "
+                f"joint_pos={joint_pos[0].cpu().numpy()}")
+        
+    # 综合判定
+    return is_near & is_x_valid & is_effort_closing & is_not_empty
+
+
+# 修改 gripper_close_when_near 函数
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),  # 改为 dryingbox_cfg
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    handle_name: str = "Equipment_BB_13_C",  # 新增 handle_name 参数
+    left_gripper_name: str = "right_hand_l",  # 更新默认值
+    right_gripper_name: str = "right_hand__r",  # 更新默认值
+    joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"],  # 更新默认值
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.005,  # 更新默认值
+    grasp_radius: float = 0.03  # 更新默认值
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, dryingbox_cfg, robot_cfg, handle_name,  # 传入 handle_name
+        left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+
+
+
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
+#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
+#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.03,
+#     grasp_radius: float = 0.02 # 球面半径 2cm
+# ) -> torch.Tensor:
+    
+#     # 1. 获取位置
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 把手中心
+    
+#     # 获取左右指尖位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 计算夹爪中心
+#     pos_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 距离判定 (Is Inside Sphere?)
+#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
+#     is_in_sphere = (dist_center < grasp_radius).float()
+    
+#     # 3. "在中间"判定 (Is Between?)
+#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
+#     vec_LR = pos_R - pos_L # 左指指向右指
+#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+    
+#     # 计算投影比例 t
+#     # P_proj = P_L + t * (P_R - P_L)
+#     # t = (vec_LO . vec_LR) / |vec_LR|^2
+#     # 如果 0 < t < 1，说明投影在两个指尖之间
+    
+#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
+#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
+#     t = dot / len_sq
+    
+#     is_between = (t > 0.0) & (t < 1.0)
+#     is_between = is_between.float()
+    
+#     # 4. 闭合判定
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
+#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
+#     # 只有当非常接近闭合目标时才给分
+#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
+    
+#     # 5. 最终奖励
+#     # 只有三者同时满足才给 1.0 分
+#     reward = is_in_sphere * is_between * is_closing
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+# def lid_is_lifted(
+#     env:ManagerBasedRLEnv,
+#     minimal_height:float,
+#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
+# ) -> torch.Tensor:
+
+#     lid = env.scene[lid_cfg.name]
+#     lid_height = lid.data.root_pos_w[:, 2]
+#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
+#     reward=torch.nan_to_num(reward, nan=0.0)
+    
+#     return reward
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
+# def lid_grasped_bonus(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     left_gripper_name: str,
+#     joint_names: list,
+#     distance_threshold: float = 0.05,
+#     closed_pos: float = 0.03
+# ) -> torch.Tensor:
+#     # 1. 计算距离
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+#     lid_pos = lid.data.root_pos_w[:, :3]
+#     distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    
+#     # 2. 检查距离是否达标
+#     is_close = distance < distance_threshold
+    
+#     # 3. 检查夹爪是否闭合
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     # 检查是否接近闭合 (比如 > 0.02)
+#     is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
+    
+#     # 4. 给分
+#     # 只有同时满足才给 1.0
+#     reward = torch.where(is_close & is_closed, 1.0, 0.0)
+    
+#     return torch.nan_to_num(reward)
+
+# 在 rewards.py 文件开头添加调试函数
+def debug_robot_physics(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "right_hand_body",
+    distance_threshold: float = 0.1,  # 当距离小于此值时打印
+    print_interval: int = 10  # 每N步打印一次
+) -> None:
+    """
+    调试函数：打印机器人的速度、加速度等信息
+    """
+    # 使用静态变量控制打印频率
+    if not hasattr(debug_robot_physics, "step_count"):
+        debug_robot_physics.step_count = 0
+    debug_robot_physics.step_count += 1
+    
+    if debug_robot_physics.step_count % print_interval != 0:
+        return
+    
+    robot = env.scene[robot_cfg.name]
+    
+    # 1. 获取机器人根节点的速度和加速度
+    root_lin_vel = robot.data.root_lin_vel_w  # (num_envs, 3)
+    root_ang_vel = robot.data.root_ang_vel_w  # (num_envs, 3)
+    root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1)  # (num_envs,)
+    root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1)  # (num_envs,)
+    
+    # 2. 获取夹爪body的速度
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :]  # (num_envs, 3)
+    gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :]  # (num_envs, 3)
+    gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1)
+    gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1)
+    
+    # 3. 获取关节速度（可能影响碰撞）
+    joint_vel = robot.data.joint_vel  # (num_envs, num_joints)
+    max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0]  # (num_envs,)
+    
+    # 4. 检查是否有异常高的速度（可能导致碰撞问题）
+    high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
+    
+    if torch.any(high_vel_mask):
+        env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
+        for env_id in env_ids[:3]:  # 只打印前3个环境
+            print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
+            print(f"  机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
+            print(f"  机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
+            print(f"  夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
+            print(f"  夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
+            print(f"  最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
+
+def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
+                                    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    handle_name: str = "Equipment_BB_13_C",
+                                    left_gripper_name: str = "right_hand_l",
+                                    right_gripper_name: str = "right_hand__r",
+                                    height_offset: float = 0.03,
+                                    std: float = 0.1) -> torch.Tensor:
+
+    dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+    target_pos = handle_pos_w.clone()
+    target_pos[:, 1] += height_offset
+    target_pos[:, 0] += 0.02
+    # target_pos[:, 2] -= 0.04
+    
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # ===== 添加调试打印 =====
+    # 当夹爪接近手柄时（距离 < 0.15m）打印物理信息
+    close_mask = distance < 0.03
+    if torch.any(close_mask):
+        debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
+    # ========================
+    
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    # 计算距离后插入
+    if env.common_step_counter % 500 == 0:
+        print("pos_align dbg env0: "
+            f"handle={handle_pos_w[0].cpu().numpy()}, "
+            f"target={target_pos[0].cpu().numpy()}, "
+            f"gripL={left_gripper_pos[0].cpu().numpy()}, "
+            f"gripR={right_gripper_pos[0].cpu().numpy()}, "
+            f"center={gripper_center[0].cpu().numpy()}, "
+            f"dist={distance[0].item():.4f}")
+
+    return position_reward
+
+
+# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
+#                                     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+#                                     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                     handle_name: str = "Equipment_BB_13_C",
+#                                     left_gripper_name: str = "right_hand_l",
+#                                     right_gripper_name: str = "right_hand__r",
+#                                     height_offset: float = 0.03,
+#                                     std: float = 0.1) -> torch.Tensor:
+
+#     dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+#     handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+#     target_pos = handle_pos_w.clone()
+#     target_pos[:, 1] += height_offset
+#     target_pos[:, 0] += 0.02
+#     # target_pos[:, 2] -= 0.02
+    
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+#     distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+#     # ===== 添加调试打印 =====
+#     # 当夹爪接近手柄时（距离 < 0.15m）打印物理信息
+#     close_mask = distance < 0.03
+#     if torch.any(close_mask):
+#         debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
+#     # ========================
+    
+#     position_reward = 1.0 - torch.tanh(distance / std)
+    
+#     # 计算距离后插入
+#     if env.common_step_counter % 00 == 0:
+#         print("pos_align dbg env0: "
+#             f"handle={handle_pos_w[0].cpu().numpy()}, "
+#             f"target={target_pos[0].cpu().numpy()}, "
+#             f"gripL={left_gripper_pos[0].cpu().numpy()}, "
+#             f"gripR={right_gripper_pos[0].cpu().numpy()}, "
+#             f"center={gripper_center[0].cpu().numpy()}, "
+#             f"dist={distance[0].item():.4f}")
+
+#     return position_reward
+
+
+# def gripper_open_when_far(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     handle_name: str,
+#     left_gripper_name: str,
+#     right_gripper_name: str,
+#     joint_names: list,
+#     dist_threshold: float = 0.5,
+#     target_open_pos: float = 0.3,
+# ) -> torch.Tensor:
+#     """
+#     当夹爪距离目标点较远（> dist_threshold）时，奖励夹爪保持打开状态。
+#     """
+#     # 1. 获取位置
+#     dryingbox = env.scene[dryingbox_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 获取手柄位置
+#     handle_ids, _ = dryingbox.find_bodies([handle_name])
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+#     # 获取夹爪中心位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+
+#     gripper_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 计算距离
+#     dist = torch.norm(gripper_center - handle_pos, dim=-1)
+    
+#     # 3. 获取手指关节位置的绝对值
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     current_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     mean_pos_abs = torch.mean(current_pos_abs, dim=1)
+    
+#     # 4. 计算奖励：如果距离 > 0.5，奖励 mean_pos_abs 接近 target_open_pos (0.3)
+#     # 使用 tanh 误差函数，误差越小奖励越高
+#     error = torch.abs(mean_pos_abs - target_open_pos)
+#     reward = 1.0 - torch.tanh(error / 0.1) # 0.1 为缩放因子，可根据需要调整
+    
+#     # 只有距离大于阈值时才发放奖励
+#     return torch.where(dist > dist_threshold, reward, torch.zeros_like(reward))
+
+
+def gripper_open_when_far(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,   # 保留接口一致性，但不再使用
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,               # 保留接口一致性，但不再使用
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,              # 不再使用
+    dist_threshold: float = 0.5,    # 不再使用
+    target_open_dist: float = 0.06, # 两手指完全打开时的距离（米）
+) -> torch.Tensor:
+    """
+    奖励夹爪最大张开状态：
+    只根据左右两根手指之间的实际距离计算奖励，
+    当距离接近 target_open_dist (0.06m) 时奖励最大。
+    """
+
+    # 1. 获取机器人
+    robot = env.scene[robot_cfg.name]
+
+    # 2. 获取左右手指的世界坐标
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+
+    # 3. 计算两手指之间的实际距离
+    finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+    # 4. 奖励：距离越接近最大打开距离，奖励越高
+    error = torch.abs(finger_dist - target_open_dist)
+    reward = 1.0 - torch.tanh(error / 0.01)  # 0.01m = 1cm 作为平滑尺度
+
+    return reward
--- a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py
@@ -40,7 +40,7 @@ TABLE_CFG=RigidObjectCfg(
    prim_path="{ENV_REGEX_NS}/Table",
    init_state=RigidObjectCfg.InitialStateCfg(
        pos=[0.95, -0.3, 0.005],
-        rot=[0.7071, 0.0, 0.0, 0.7071],
+        rot=[0.7071, 0.0, 0.0, -0.7071],
        lin_vel=[0.0, 0.0, 0.0],
        ang_vel=[0.0, 0.0, 0.0],
    ),
@@ -76,7 +76,7 @@ DRYINGBOX_CFG = ArticulationCfg(
        ),
    ),
    init_state=ArticulationCfg.InitialStateCfg(
-        pos=(0.95, -0.45, 0.9),
+        pos=[0.95, -0.45, 0.9],
        rot=[-0.7071, 0.0, 0.0, 0.7071]
        ),
    # actuators={}
@@ -157,7 +157,8 @@ ULTRASOUND_CFG = ArticulationCfg(
 ##
 # Scene definition
 ##
-
+from isaaclab.sensors import CameraCfg
+import isaaclab.sim as sim_utils

@configclass
 class PullSceneCfg(InteractiveSceneCfg):
@@ -181,6 +182,109 @@ class PullSceneCfg(InteractiveSceneCfg):
        prim_path="/World/DomeLight",
        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
    )
+    # # ------------- 新增 6 路相机 -------------
+    # cam_head = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=1.93,  # 使用你给的 1.93
+    #         # 由 fx = W * f / aperture 推得 aperture = W * f / fx
+    #         horizontal_aperture=1280 * 1.93 / 911.77,  # ≈ 2.71，单位与 focal_length 一致
+    #         clipping_range=(0.1, 1e5),
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.1226748226, 0.0, 0.1130563345),
+    #         rot=(-0.5, 0.5, -0.5, 0.5),
+    #         # convention="ros",
+
+    #         # rot=(0.70710678, 0, 0.70710678, 0),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_chest = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(-0.14909, 0.3549, -0.23295),
+    #         rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_left_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
+    #         rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_right_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.0610571709, 2.58e-7, 0.0711255539),#（0.0610568560, 1.13e-8, 0.0683504405）
+    #         rot=(0.445, -0.549, -0.549, -0.445),#（0.99452, 0, 0.10453, 0）
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_top = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.4627602211, 0.0, 2.0977711170),
+    #         rot=(0.83526, 0, 0.54986, 0),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=960,
+    #     height=720,
+    #     update_latest_camera_pose=False,
+    # )
+
+    # cam_side = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.76709, -0.16995, 1.13007),
+    #         rot=(0.61427, 0, 0.02598, 0.78866),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=960,
+    #     height=720,
+    #     update_latest_camera_pose=False,
+    # )
+    # # ------------- 相机结束 -------------


 ##
@@ -222,21 +326,6 @@ class ActionsCfg:
        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
    )

-    # left_arm_fixed = mdp.JointPositionActionCfg(
-    #     asset_name="Mindbot",
-    #     joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
-        
-    #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
-    #     scale=0.0, 
-    #     offset={
-    #         "l_joint1": 2.3562,
-    #         "l_joint2": -1.2217,
-    #         "l_joint3": -1.5708,
-    #         "l_joint4": 1.5708,
-    #         "l_joint5": 1.5708,
-    #         "l_joint6": -1.2217,
-    #     },
-    # )

    grippers_position = mdp.BinaryJointPositionActionCfg(
        asset_name="Mindbot",
@@ -317,16 +406,12 @@ class EventCfg:
        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
    reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
        params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
-    # reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
-    #     params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
-    # reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
-    #     params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})


@configclass
 class RewardsCfg:
    """Reward terms for the MDP."""
-
+    # 1. 姿态对齐：保持不变，这是基础
    gripper_handle_orientation_alignment = RewTerm(
        func=mdp.gripper_handle_orientation_alignment,
        params={
@@ -337,15 +422,12 @@ class RewardsCfg:
            "gripper_forward_axis": (0.0, 0.0, 1.0), 
            "gripper_width_axis": (0.0, 1.0, 0.0), 
            "lid_handle_axis": (0.0, 0.0, 1.0),
-            # 删除了 handle_axis，因为新函数中不再使用它
-            "max_angle_deg": 85.0,  # 建议改为 30.0 或更小，85.0 对指向性约束来说太宽松
+            "max_angle_deg": 60.0, # 稍微放宽一点，或者保持 30.0，看前期训练难度
        },
-        weight=5.0 # 建议保持在 4.0 或 5.0，确保姿态约束有足够的引导力
+        weight=4.0 
    )
-
-    # stage 2
-    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
-    
+    # 2. 位置对齐 (合并了之前的粗略和精细对齐)
+    # std=0.05 提供了从 15cm 到 0cm 全程较好的梯度，解决了之前的梯度消失问题
    gripper_handle_position_alignment = RewTerm(
        func=mdp.gripper_handle_position_alignment,
        params={
@@ -354,29 +436,31 @@ class RewardsCfg:
            "handle_name":"Equipment_BB_13_C",
            "left_gripper_name": "right_hand_l",
            "right_gripper_name": "right_hand__r",
-            "height_offset": 0.00,  # Z方向：lid 上方 0.1m
-            "std": 0.2,  # 位置对齐的容忍度0.3
+            "height_offset": 0.00,
+            "std": 0.05,  # 关键修改：0.3太松，0.03太紧导致梯度消失，0.05是平衡点
        },
-        weight=3.0
+        weight=6.0 # 提高权重，作为主导奖励
    )
-
-    # 【新增】精细对齐 (引导进入 2cm 圈)
-    gripper_handle_fine_alignment = RewTerm(
-        func=mdp.gripper_handle_position_alignment,
+    # 3. 远距离闭合惩罚 (逻辑已修正)
+    # 现在它只惩罚“在远处就闭眼”的行为，不会阻挡机器人靠近
+    gripper_closed_penalty = RewTerm(
+        func=mdp.gripper_open_penalty_when_far,
        params={
            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
            "robot_cfg": SceneEntityCfg("Mindbot"),
-            "handle_name":"Equipment_BB_13_C",
+            "handle_name": "Equipment_BB_13_C",
            "left_gripper_name": "right_hand_l",
            "right_gripper_name": "right_hand__r",
-            "height_offset": 0.00,  # Z方向：lid 上方 0.1m
-            "std": 0.05,  # 位置对齐的容忍度0.05
+            "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+            "dist_threshold": 0.06, # 2.5cm 保护区，进入后允许闭合
        },
-        weight=10.0 # 高权重
+        weight=0.5 # 权重降低，作为辅助约束，不要喧宾夺主
    )
+    # ... 在 RewardsCfg 类中修改 ...

+    # 4. 近距离抓取交互 (改用 V2 版本或增加稳定性项)
    gripper_close_interaction = RewTerm( 
-        func=mdp.gripper_close_when_near,
+        func=mdp.gripper_close_interaction_v2, # 切换到带稳定性的版本
        params={
            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
            "robot_cfg": SceneEntityCfg("Mindbot"),
@@ -384,34 +468,171 @@ class RewardsCfg:
            "left_gripper_name": "right_hand_l",
            "right_gripper_name": "right_hand__r",
            "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
-            "target_close_pos": 0.02,
-            "height_offset": 0.00,#0.07
-            "grasp_radius": 0.05,#original 0.03
+            "target_close_pos": 0.012, 
+            "height_offset": 0.00,
+            "grasp_radius": 0.05,
        },
-        weight=10.0
+        weight=8.0 # 适当调低一点点，为稳定性奖励腾出空间
    )

-    # lid_lifted_reward = RewTerm(
-    #     func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+    # 5. 新增：抓取稳定性奖励 (核心：惩罚抖动)
+    gripper_grasp_stability = RewTerm(
+        func=mdp.gripper_grasp_stability,
+        params={
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+            "vel_threshold": 0.005,
+        },
+        weight=4.0 # 只要保持静止就一直给分
+    )
+    # 4. 近距离抓取交互 (修正了目标值)
+    # gripper_close_interaction = RewTerm( 
+    #     func=mdp.gripper_close_when_near,
    #     params={
-    #         "lid_cfg": SceneEntityCfg("lid"),
-    #         "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+    #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "handle_name":"Equipment_BB_13_C",
+    #         "left_gripper_name": "right_hand_l",
+    #         "right_gripper_name": "right_hand__r",
+    #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+            
+    #         # 关键修改：物理接触点计算
+    #         # 夹爪空载全闭是0.03 (对应0cm)
+    #         # 把手宽3.88cm -> 单侧行程 (6-3.88)/2 = 1.06cm = 0.0106m
+    #         # 设为 0.012 确保接触时计算出的奖励是满分 (ratio >= 1.0)
+    #         "target_close_pos": 0.012, 
+            
+    #         "height_offset": 0.00,
+    #         "grasp_radius": 0.05,
    #     },
-    #     weight=10.0 # 给一个足够大的权重
+    #     weight=10.0 # 只有成功抓到才给的大奖
    # )

-    # lid_lifting_reward = RewTerm(
-    #     func=mdp.lid_lift_progress_reward,
-    #     params={
-    #         "lid_cfg": SceneEntityCfg("lid"),
-    #         "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
-    #         "target_height_lift": 0.25,
-    #         "height_offset": 0.07, # 与其他奖励保持一致
-    #         "std": 0.1
-    #     },
-    #     # 权重设大一点，告诉它这是最终目标
-    #     weight=10.0 
-    # )
+# @configclass
+# class RewardsCfg:
+#     """Reward terms for the MDP."""
+
+#     gripper_handle_orientation_alignment = RewTerm(
+#         func=mdp.gripper_handle_orientation_alignment,
+#         params={
+#             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#             "robot_cfg": SceneEntityCfg("Mindbot"),
+#             "handle_name": "Equipment_BB_13_C",
+#             "gripper_body_name": "right_hand_body",
+#             "gripper_forward_axis": (0.0, 0.0, 1.0), 
+#             "gripper_width_axis": (0.0, 1.0, 0.0), 
+#             "lid_handle_axis": (0.0, 0.0, 1.0),
+#             # 删除了 handle_axis，因为新函数中不再使用它
+#             "max_angle_deg": 30.0,  # 建议改为 30.0 或更小，85.0 对指向性约束来说太宽松
+#         },
+#         weight=4.0 # 建议保持在 4.0 或 5.0，确保姿态约束有足够的引导力
+#     )
+
+#     # stage 2
+#     # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    
+#     gripper_handle_position_alignment = RewTerm(
+#         func=mdp.gripper_handle_position_alignment,
+#         params={
+#             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#             "robot_cfg": SceneEntityCfg("Mindbot"),
+#             "handle_name":"Equipment_BB_13_C",
+#             "left_gripper_name": "right_hand_l",
+#             "right_gripper_name": "right_hand__r",
+#             "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+#             "std": 0.3,  # 位置对齐的容忍度0.3 ###0.1
+#         },
+#         weight=3.0
+#     )
+
+#         # 强制远距离打开奖励
+#     # 远距离闭合惩罚
+#     gripper_closed_penalty = RewTerm(
+#         func=mdp.gripper_open_penalty_when_far,
+#         params={
+#             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#             "robot_cfg": SceneEntityCfg("Mindbot"),
+#             "handle_name": "Equipment_BB_13_C",
+#             "left_gripper_name": "right_hand_l",
+#             "right_gripper_name": "right_hand__r",
+#             "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+#             "dist_threshold": 0.02, # 3cm 以外不允许闭合
+#             "max_penalty_dist":0.055 #0.055
+#         },
+#         weight=1.0 # 提高权重，让扣分很痛8.0
+#     )
+
+
+#     # 【新增】精细对齐 (引导进入 2cm 圈)
+#     gripper_handle_fine_alignment = RewTerm(
+#         func=mdp.gripper_handle_position_alignment,
+#         params={
+#             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#             "robot_cfg": SceneEntityCfg("Mindbot"),
+#             "handle_name":"Equipment_BB_13_C",
+#             "left_gripper_name": "right_hand_l",
+#             "right_gripper_name": "right_hand__r",
+#             "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+#             "std": 0.03,  # 位置对齐的容忍度0.05
+#         },
+#         weight=5.0 # 高权重
+#     )
+
+#     # gripper_close_interaction = RewTerm(
+#     #     func=mdp.gripper_close_when_near,
+#     #     params={
+#     #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#     #         "robot_cfg": SceneEntityCfg("Mindbot"),
+#     #         "handle_name": "Equipment_BB_13_C",
+#     #         "left_gripper_name": "right_hand_l",
+#     #         "right_gripper_name": "right_hand__r",
+#     #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+#     #         # 关键匹配把手尺寸
+#     #         "handle_thickness": 0.0388,  # 把手厚度（X/宽度向抓取方向）
+#     #         "thickness_tol": 0.008,      # 误差容忍，约 0.8cm
+#     #         "grasp_radius": 0.04,        # 进入此半径才计算闭合奖励
+#     #         "height_offset": 0.00,
+#     #         "target_close_pos": 0.03,    # 保留给关节闭合幅度
+#     #     },
+#     #     weight=10.0
+#     # )
+#     gripper_close_interaction = RewTerm( 
+#         func=mdp.gripper_close_when_near,
+#         params={
+#             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+#             "robot_cfg": SceneEntityCfg("Mindbot"),
+#             "handle_name":"Equipment_BB_13_C",
+#             "left_gripper_name": "right_hand_l",
+#             "right_gripper_name": "right_hand__r",
+#             "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+#             "target_close_pos": 0.03,
+#             "height_offset": 0.00,#0.07
+#             "grasp_radius": 0.04,#original 0.03
+#         },
+#         weight=10.0
+#     )
+
+#     # lid_lifted_reward = RewTerm(
+#     #     func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+#     #     params={
+#     #         "lid_cfg": SceneEntityCfg("lid"),
+#     #         "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+#     #     },
+#     #     weight=10.0 # 给一个足够大的权重
+#     # )
+
+#     # lid_lifting_reward = RewTerm(
+#     #     func=mdp.lid_lift_progress_reward,
+#     #     params={
+#     #         "lid_cfg": SceneEntityCfg("lid"),
+#     #         "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+#     #         "target_height_lift": 0.25,
+#     #         "height_offset": 0.07, # 与其他奖励保持一致
+#     #         "std": 0.1
+#     #     },
+#     #     # 权重设大一点，告诉它这是最终目标
+#     #     weight=10.0 
+#     # )



--- a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak
+++ b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak
@@ -0,0 +1,681 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+ 
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, -0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+DRYINGBOX_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        pos=[0.95, -0.45, 0.9],
+        rot=[-0.7071, 0.0, 0.0, 0.7071]
+        ),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=["RevoluteJoint"],
+            
+            stiffness=10000.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.2, 0.65, 0.9],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=[".*"],
+            
+            stiffness=0.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+##
+# Scene definition
+##
+from isaaclab.sensors import CameraCfg
+import isaaclab.sim as sim_utils
+
+@configclass
+class PullSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
+    # ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+
+    # # ------------- 修改后的相机配置 (直接使用 USD 内置相机) -------------
+    
+    # # 1. 头部相机
+    # # USD 路径: /root/robot_head/cam_head
+    # cam_head = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
+    #     update_latest_camera_pose=True,
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     # 移除 spawn 和 offset，直接使用 USD 中的参数和位置
+    # )
+
+    # # 2. 胸部相机
+    # # USD 路径: /root/robot_trunk/cam_chest
+    # cam_chest = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
+    #     update_latest_camera_pose=True,
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    # )
+
+    # # 3. 左手相机
+    # # USD 路径: /root/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand
+    # cam_left_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
+    #     update_latest_camera_pose=True,
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    # )
+
+    # # 4. 右手相机
+    # # USD 路径: /root/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand
+    # cam_right_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
+    #     update_latest_camera_pose=True,
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    # )
+
+    # ------------- 相机结束 -------------
+
+
+    # # ------------- 新增 6 路相机 -------------
+    # cam_head = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=1.93,  # 使用你给的 1.93
+    #         # 由 fx = W * f / aperture 推得 aperture = W * f / fx
+    #         horizontal_aperture=1280 * 1.93 / 911.77,  # ≈ 2.71，单位与 focal_length 一致
+    #         clipping_range=(0.1, 1e5),
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.1226748226, 0.0, 0.1130563345),
+    #         rot=(-0.5, 0.5, -0.5, 0.5),
+    #         # convention="ros",
+
+    #         # rot=(0.70710678, 0, 0.70710678, 0),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_chest = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(-0.14909, 0.3549, -0.23295),
+    #         rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_left_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
+    #         rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_right_hand = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.0610571709, 2.58e-7, 0.0711255539),#（0.0610568560, 1.13e-8, 0.0683504405）
+    #         rot=(0.445, -0.549, -0.549, -0.445),#（0.99452, 0, 0.10453, 0）
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=1280,
+    #     height=720,
+    #     update_latest_camera_pose=True,
+    # )
+
+    # cam_top = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.4627602211, 0.0, 2.0977711170),
+    #         rot=(0.83526, 0, 0.54986, 0),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=960,
+    #     height=720,
+    #     update_latest_camera_pose=False,
+    # )
+
+    # cam_side = CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
+    #     spawn=sim_utils.PinholeCameraCfg(
+    #         focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
+    #     ),
+    #     offset=CameraCfg.OffsetCfg(
+    #         pos=(0.76709, -0.16995, 1.13007),
+    #         rot=(0.61427, 0, 0.02598, 0.78866),
+    #         convention="ros",
+    #     ),
+    #     data_types=["rgb"],
+    #     width=960,
+    #     height=720,
+    #     update_latest_camera_pose=False,
+    # )
+    # # ------------- 相机结束 -------------
+
+
+##
+# MDP settings
+##
+
+def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
+    return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+
+# 新增：通用获取 body 世界姿态的函数
+def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
+    return asset.data.body_quat_w[:, body_ids[0]]
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    right_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["r_joint[1-6]"],  # 左臂的6个关节
+        body_name="right_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["right_hand_joint_left", "right_hand_joint_right"],
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
+    )
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.25,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
+        dryingbox_handle_pos = ObsTerm(
+            func=get_body_pos_rel, 
+            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+        )
+        dryingbox_handle_quat = ObsTerm(
+            func=get_body_quat_w, 
+            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+        )
+        # lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        # ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+    # 重置所有关节到 init_state（无偏移）
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 只对左臂添加随机偏移
+    reset_mindbot_right_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    gripper_handle_orientation_alignment = RewTerm(
+        func=mdp.gripper_handle_orientation_alignment,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name": "Equipment_BB_13_C",
+            "gripper_body_name": "right_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 0.0, 1.0),
+            # 删除了 handle_axis，因为新函数中不再使用它
+            "max_angle_deg": 30.0,  # 建议改为 30.0 或更小，85.0 对指向性约束来说太宽松
+        },
+        weight=4.0 # 建议保持在 4.0 或 5.0，确保姿态约束有足够的引导力
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    
+    gripper_handle_position_alignment = RewTerm(
+        func=mdp.gripper_handle_position_alignment,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name":"Equipment_BB_13_C",
+            "left_gripper_name": "right_hand_l",
+            "right_gripper_name": "right_hand__r",
+            "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度0.3 ###0.1
+        },
+        weight=3.0
+    )
+
+        # 强制远距离打开奖励
+    # 远距离闭合惩罚
+    gripper_closed_penalty = RewTerm(
+        func=mdp.gripper_open_penalty_when_far,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name": "Equipment_BB_13_C",
+            "left_gripper_name": "right_hand_l",
+            "right_gripper_name": "right_hand__r",
+            "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+            "dist_threshold": 0.02, # 3cm 以外不允许闭合
+            "max_penalty_dist":0.055 #0.055
+        },
+        weight=1.0 # 提高权重，让扣分很痛8.0
+    )
+
+
+    # 【新增】精细对齐 (引导进入 2cm 圈)
+    gripper_handle_fine_alignment = RewTerm(
+        func=mdp.gripper_handle_position_alignment,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name":"Equipment_BB_13_C",
+            "left_gripper_name": "right_hand_l",
+            "right_gripper_name": "right_hand__r",
+            "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+            "std": 0.03,  # 位置对齐的容忍度0.05
+        },
+        weight=5.0 # 高权重
+    )
+
+    # gripper_close_interaction = RewTerm(
+    #     func=mdp.gripper_close_when_near,
+    #     params={
+    #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "handle_name": "Equipment_BB_13_C",
+    #         "left_gripper_name": "right_hand_l",
+    #         "right_gripper_name": "right_hand__r",
+    #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+    #         # 关键匹配把手尺寸
+    #         "handle_thickness": 0.0388,  # 把手厚度（X/宽度向抓取方向）
+    #         "thickness_tol": 0.008,      # 误差容忍，约 0.8cm
+    #         "grasp_radius": 0.04,        # 进入此半径才计算闭合奖励
+    #         "height_offset": 0.00,
+    #         "target_close_pos": 0.03,    # 保留给关节闭合幅度
+    #     },
+    #     weight=10.0
+    # )
+    gripper_close_interaction = RewTerm( 
+        func=mdp.gripper_close_when_near,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name":"Equipment_BB_13_C",
+            "left_gripper_name": "right_hand_l",
+            "right_gripper_name": "right_hand__r",
+            "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+            "target_close_pos": 0.03,
+            "height_offset": 0.00,#0.07
+            "grasp_radius": 0.04,#original 0.03
+        },
+        weight=10.0
+    )
+
+    # lid_lifted_reward = RewTerm(
+    #     func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+    #     },
+    #     weight=10.0 # 给一个足够大的权重
+    # )
+
+    # lid_lifting_reward = RewTerm(
+    #     func=mdp.lid_lift_progress_reward,
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+    #         "target_height_lift": 0.25,
+    #         "height_offset": 0.07, # 与其他奖励保持一致
+    #         "std": 0.1
+    #     },
+    #     # 权重设大一点，告诉它这是最终目标
+    #     weight=10.0 
+    # )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    # lid_fly_away = DoneTerm(
+    #     func=mdp.base_height_failure,
+    #     params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    # )
+
+    # # 新增：盖子掉落判定
+    # # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    # lid_dropped = DoneTerm(
+    #     func=mdp.base_height_failure, # 复用高度判定函数
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("lid"), 
+    #         "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+    #     },
+    # )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class PullEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: PullSceneCfg = PullSceneCfg(num_envs=5, env_spacing=3.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Pull-Ultrasound-Lid-Up-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 48  # 稍微增加每轮步数
+    max_iterations = 5000   # 增加迭代次数，给它足够的时间学习
+    save_interval = 100
+    # experiment_name = "mindbot_grasp"  # 建议改个名，不要叫 cartpole 了
+    experiment_name = "mindbot_pullUltrasoundLidUp"
+    
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.7,
+        actor_obs_normalization=True,   # 开启归一化
+        critic_obs_normalization=True,  # 开启归一化
+        # 增加网络容量：三层 MLP，宽度足够
+        actor_hidden_dims=[128, 64, 32], 
+        critic_hidden_dims=[128, 64, 32],
+        activation="elu",
+    )
+    
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005, # 保持适度的探索
+        num_learning_epochs=5,
+        num_mini_batches=8,
+        learning_rate=3e-4, # 如果网络变大后不稳定，可以尝试降低到 5.0e-4
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+
+
+# @configclass
+# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+#     num_steps_per_env = 16
+#     max_iterations = 150
+#     save_interval = 50
+#     experiment_name = "cartpole_direct"
+#     policy = RslRlPpoActorCriticCfg(
+#         init_noise_std=1.0,
+#         actor_obs_normalization=False,
+#         critic_obs_normalization=False,
+#         actor_hidden_dims=[32, 32],
+#         critic_hidden_dims=[32, 32],
+#         activation="elu",
+#     )
+#     algorithm = RslRlPpoAlgorithmCfg(
+#         value_loss_coef=1.0,
+#         use_clipped_value_loss=True,
+#         clip_param=0.2,
+#         entropy_coef=0.005,
+#         num_learning_epochs=5,
+#         num_mini_batches=4,
+#         learning_rate=1.0e-3,
+#         schedule="adaptive",
+#         gamma=0.99,
+#         lam=0.95,
+#         desired_kl=0.01,
+#         max_grad_norm=1.0,
+#     )
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""This sub-module contains the functions that are specific to the environment."""
+
+from isaaclab.envs.mdp import *  # noqa: F401, F403
+
+from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
+from .gripper import *  # noqa: F401, F403
+from .curriculums import *  # noqa: F401, F403
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+from isaaclab.envs.mdp import modify_term_cfg 
+
+def annealing_std(
+    env: ManagerBasedRLEnv, 
+    env_ids: torch.Tensor, 
+    current_value: float, 
+    start_step: int, 
+    end_step: int, 
+    start_std: float, 
+    end_std: float
+):
+    """
+    根据步数线性退火 std 值。
+    
+    Args:
+        current_value: 当前的参数值 (系统自动传入)
+        start_step: 开始退火的步数
+        end_step: 结束退火的步数
+        start_std: 初始 std
+        end_std: 最终 std
+    """
+    current_step = env.common_step_counter
+    
+    # 1. 还没到开始时间 -> 保持初始值 (或者不改)
+    if current_step < start_step:
+        # 如果当前值还没被设为 start_std，就强制设一下，否则不动
+        return start_std
+        
+    # 2. 已经超过结束时间 -> 保持最终值
+    elif current_step >= end_step:
+        return end_std
+        
+    # 3. 在中间 -> 线性插值
+    else:
+        ratio = (current_step - start_step) / (end_step - start_step)
+        new_std = start_std + (end_std - start_std) * ratio
+        return new_std
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py
@@ -0,0 +1,54 @@
+# 假设这是在你的 mdp.py 文件中
+
+from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
+from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
+from isaaclab.utils import configclass
+import torch
+
+
+class CoupledJointPositionAction(JointPositionAction):
+    def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): 
+        super().__init__(cfg, env)
+
+    @property
+    def action_dim(self) -> int:
+        """强制 ActionManager 认为只需要 1D 输入。"""
+        return 1
+
+    """
+    这是运行时被实例化的类。它继承自 JointPositionAction。
+    """
+    def process_actions(self, actions: torch.Tensor):
+        scale = self.cfg.scale
+        offset = self.cfg.offset
+        # store the raw actions
+        self._raw_actions[:] = torch.clamp(actions, -1, 1)
+        # apply the affine transformations
+        target_position_interval = self._raw_actions * scale + offset
+        right_cmd = target_position_interval
+        left_cmd = -target_position_interval
+        # import pdb; pdb.set_trace()
+        self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
+        # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
+        # clip actions
+        if self.cfg.clip is not None:
+            self._processed_actions = torch.clamp(
+                self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
+            )
+
+
+@configclass
+class CoupledJointPositionActionCfg(JointPositionActionCfg):
+    """
+    配置类。关键在于设置 class_type 指向上面的实现类。
+    """
+    # !!! 关键点 !!! 
+    # 告诉 ActionManager: "当你看到这个配置时，请实例化 CoupledJointPositionAction 这个类"
+    class_type = CoupledJointPositionAction 
+
+    def __post_init__(self) -> None:
+        # 这里的检查逻辑是没问题的，因为 ActionManager 会在初始化时调用它
+        if len(self.joint_names) != 2:
+            raise ValueError("Requires exactly two joint names.")
+        super().__post_init__()
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py
@@ -0,0 +1,881 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+
+# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+#                                         lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#                                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                         left_gripper_name: str = "left_hand__l",
+#                                         right_gripper_name: str = "left_hand_r",
+#                                         height_offset: float = 0.1,
+#                                         std: float = 0.1) -> torch.Tensor:
+#     """奖励函数：夹爪相对于lid的位置对齐。
+    
+#     Args:
+#         env: 环境实例
+#         lid_cfg: lid的配置
+#         robot_cfg: 机器人的配置
+#         left_gripper_name: 左侧夹爪body名称
+#         right_gripper_name: 右侧夹爪body名称
+#         side_distance: Y方向的期望距离
+#         height_offset: Z方向的期望高度偏移
+#         std: tanh核函数的标准差
+        
+#     Returns:
+#         位置对齐奖励 (num_envs,)
+#     """
+#     lid: RigidObject = env.scene[lid_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     # 获取两个夹爪的位置
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+#     # 计算夹爪中心位置
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    
+#     # 计算相对位置（夹爪中心相对于 lid 中心）
+#     rel_pos = gripper_center - lid_pos_w  # (num_envs, 3)
+    
+#     # X 方向位置：应该对齐（接近 0）
+#     x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
+#     x_reward = 1.0 - torch.tanh(x_dist / std)
+    
+#     # Y 方向位置：应该在 lid 的 Y 方向两侧（距离 side_distance）
+#     y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
+#     # y_error = torch.abs(y_dist - side_distance)
+#     # y_reward = 1.0 - torch.tanh(y_error / std)
+#     y_reward = 1.0 - torch.tanh(y_dist / std)
+    
+#     # Z 方向位置：应该在 lid 上方 height_offset 处
+#     # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
+#     z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
+#     z_reward = 1.0 - torch.tanh(z_error / std)
+    
+#     # 组合位置奖励
+#     position_reward = x_reward * y_reward * z_reward
+    
+#     return position_reward
+
+
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：夹爪相对于lid的位置对齐（简化版）。
+    
+    计算夹爪中心到目标点（lid上方height_offset处）的距离奖励。
+    
+    Args:
+        env: 环境实例
+        lid_cfg: lid的配置
+        robot_cfg: 机器人的配置
+        left_gripper_name: 左侧夹爪body名称
+        right_gripper_name: 右侧夹爪body名称
+        height_offset: Z方向的期望高度偏移（目标点在lid上方）
+        std: tanh核函数的标准差
+        
+    Returns:
+        位置对齐奖励 (num_envs,)
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 目标点：lid位置 + height_offset（Z方向）
+    target_pos = lid_pos_w.clone()
+    # print(target_pos[0])
+    target_pos[:, 2] += height_offset  # 在lid上方height_offset处
+    # print(target_pos[0])
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    # print(gripper_center[0])
+    # 计算夹爪中心到目标点的3D距离
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 距离奖励：距离越小，奖励越大
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# ... (保留原有的 import)
+
+# ==============================================================================
+# 新增：直接距离惩罚 (线性引导)
+# ==============================================================================
+def gripper_lid_distance_penalty(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    height_offset: float = 0.07,
+) -> torch.Tensor:
+    """
+    计算夹爪中心到目标点的欧式距离。
+    返回的是正的距离值，我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
+    这样：距离越远 -> 惩罚越大 (Total Reward 越低)。
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # 1. 目标点 (Lid中心 + 高度偏移)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+
+    # 2. 夹爪中心
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    gripper_center = (left_pos + right_pos) / 2.0
+    
+    # 3. 计算距离
+    distance = torch.norm(gripper_center - lid_pos_w, dim=1)
+    
+    return distance
+
+# ==============================================================================
+# 优化：位置对齐 (保持原函数，但确保逻辑清晰)
+# ==============================================================================
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：基于 tanh 核函数的距离奖励 (靠近得越多，分数涨得越快)"""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    target_pos = lid_pos_w.clone()
+    target_pos[:, 2] += height_offset 
+
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] 
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] 
+    
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 
+
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 这里保持不变，关键在于我们在 config 里怎么设置 std 和 weight
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# def gripper_lid_orientation_alignment(
+#     env,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     gripper_body_name: str = "left_hand_body",
+#     gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+#     gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+#     lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+# ) -> torch.Tensor:
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 获取姿态
+#     lid_quat_w = lid.data.root_quat_w
+#     body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+#     gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+#     # 3. 垂直对齐 (Vertical Alignment)
+#     # 目标：夹爪 +Y 指向 World -Z
+#     grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+#     # print(grip_fwd_world[0])
+#     world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+#     dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+#     dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)  #add
+#     # 映射 [-1, 1] -> [0, 1]，且全程有梯度
+#     # 即使是朝天 (+1)，只要稍微往下转一点，分数就会增加
+#     # rew_vertical = (dot_vertical + 1.0) / 2.0
+#     val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
+#     rew_vertical = torch.pow(val_vertical, 2) + 1e-4
+#     # nan -> 0
+#     rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+#     # 4. 偏航对齐 (Yaw Alignment)
+#     # 目标：夹爪 +Z 平行于 Lid +Y
+#     grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+#     lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+#     lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+#     dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+#     rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+#     # nan -> 0
+#     rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+#     # 5. 组合
+#     total_reward = rew_vertical * rew_yaw
+
+#     total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+#     # debug
+#     if not torch.isfinite(total_reward).all():
+#         print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+#         print(f"rew_vertical: {rew_vertical}")
+#         print(f"rew_yaw: {rew_yaw}")
+        
+#     return total_reward
+
+
+def gripper_lid_orientation_alignment(
+    env,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "left_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+    lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+    max_angle_deg: float = 60.0,  # 允许的最大角度偏差（度）
+) -> torch.Tensor:
+    
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 获取姿态
+    lid_quat_w = lid.data.root_quat_w
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    # 3. 垂直对齐 (Vertical Alignment)
+    # 目标：夹爪前向轴指向 World -Z（向下）
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+    # 计算点积（归一化后，点积 = cos(角度)）
+    dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+    dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
+    
+    # 计算角度（弧度）
+    angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    
+    # 如果角度 <= max_angle_deg，给予奖励
+    # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_vertical = 1.0 - angle_normalized  # 角度越小，奖励越大
+    
+    # 如果角度超过60度，奖励为0
+    rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
+    
+    # nan -> 0
+    rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+    # 4. 偏航对齐 (Yaw Alignment)
+    # 目标：夹爪 +Z 平行于 Lid +Y
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+    lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+    lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+    dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+    rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_vertical * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {rew_vertical}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
+#     # 注意：确保 joint_names 只包含那两个夹爪关节
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, # 5cm 以内视为“近”，需要闭合
+#     target_open_pos: float = 0.0,  # 【修正】张开是 0.0
+#     target_close_pos: float = 0.03 # 【修正】闭合是 0.03
+# ) -> torch.Tensor:
+#     """
+#     逻辑很简单：
+#     1. 如果 距离 < close_threshold：目标关节位置 = 0.03 (闭合)
+#     2. 如果 距离 >= close_threshold：目标关节位置 = 0.0 (张开)
+#     3. 返回 -abs(当前关节 - 目标关节)，即惩罚误差。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算距离 (末端 vs Lid中心)
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # distance: (num_envs,)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 3. 动态确定目标关节值
+#     # 如果距离小于阈值，目标设为 close_pos，否则设为 open_pos
+#     # target_val: (num_envs, 1)
+#     # print(distance)
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 4. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # joint_pos: (num_envs, 2)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+    
+#     # 取绝对值（防止左指是负数的情况，虽然你的配置里范围看起来都是正的，加上abs更保险）
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 5. 计算误差
+#     # error: (num_envs,) -> 两个手指误差的平均值
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # # 6. 返回负奖励 (惩罚)
+#     # # 误差越大，惩罚越大。完全符合要求时奖励为 0。
+#     # return -error
+#     # =====================================================
+#     # 🚀 核心修复：安全输出
+#     # =====================================================
+#     # 1. 使用 tanh 限制数值范围在 [-1, 0]
+#     reward = -torch.tanh(error / 0.01)
+    
+#     # 2. 过滤 NaN！这一步能救命！
+#     # 如果物理引擎算出 NaN，这里会把它变成 0.0，防止训练崩溃
+#     reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
+    
+#     return reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", 
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, 
+#     target_open_pos: float = 0.0,
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.06,  # 新增：与位置奖励保持一致
+# ) -> torch.Tensor:
+#     """
+#     逻辑：
+#     1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
+#     2. 如果距离 < close_threshold：目标关节位置 = 闭合。
+#     3. 否则：目标关节位置 = 张开。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
+    
+#     # 获取夹爪位置
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # 3. 计算距离 (夹爪 vs 目标抓取点)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 4. 动态确定目标关节值
+#     # 靠近抓取点 -> 闭合；远离 -> 张开
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 5. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 6. 计算误差并返回奖励
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # 使用 tanh 限制数值范围
+#     reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪Z轴高度合适，不能压在盖子上面 (Z-Axis Check)
+    """
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+    
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：Z轴高度判定 (防止压在盖子上)
+    # 夹爪中心必须在把手目标点附近，允许上下微小浮动，但不能太高
+    # 假设 height_offset 是把手几何中心，那么夹爪不应该比它高太多
+    z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
+    is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取关节位置 (绝对值)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    # === 修改闭合判定 ===
+    
+    # 1. 关节位置判定 (Effort Check)
+    # 只要用力了就行，去掉上限判定，防止夹太紧被误判
+    # 假设 0.05 是你的新 target_close_pos
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    # 如果真的夹住了东西，左右指尖的距离应该被把手撑开，不会太小
+    # 获取指尖位置
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    
+    # 假设把手厚度是 1cm (0.01m)
+    # 如果指尖距离 < 0.005，说明两个手指已经碰到一起了（空夹），没夹住东西
+    # 如果指尖距离 >= 0.005，说明中间有东西挡着
+    is_not_empty = dist_fingertips > 0.005
+    
+    # 综合判定：
+    # 1. 在把手附近 (is_near)
+    # 2. 高度合适 (is_z_valid)
+    # 3. 在用力闭合 (is_effort_closing)
+    # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了！
+    return is_near & is_z_valid & is_effort_closing & is_not_empty
+
+
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.02,
+    grasp_radius: float = 0.05
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取当前关节位置绝对值 (num_envs, num_joints)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 计算两个指关节的平均闭合深度
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    # 计算奖励系数：当前位置 / 目标闭合位置
+    # 这样当关节越接近 0.03 时，奖励越接近 1.0
+    # 强制让 Agent 产生“压满”动作的冲动
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+    # 如果没对准或者没夹到，奖励依然是 0
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
+#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
+#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.03,
+#     grasp_radius: float = 0.02 # 球面半径 2cm
+# ) -> torch.Tensor:
+    
+#     # 1. 获取位置
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 把手中心
+    
+#     # 获取左右指尖位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 计算夹爪中心
+#     pos_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 距离判定 (Is Inside Sphere?)
+#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
+#     is_in_sphere = (dist_center < grasp_radius).float()
+    
+#     # 3. "在中间"判定 (Is Between?)
+#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
+#     vec_LR = pos_R - pos_L # 左指指向右指
+#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+    
+#     # 计算投影比例 t
+#     # P_proj = P_L + t * (P_R - P_L)
+#     # t = (vec_LO . vec_LR) / |vec_LR|^2
+#     # 如果 0 < t < 1，说明投影在两个指尖之间
+    
+#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
+#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
+#     t = dot / len_sq
+    
+#     is_between = (t > 0.0) & (t < 1.0)
+#     is_between = is_between.float()
+    
+#     # 4. 闭合判定
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
+#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
+#     # 只有当非常接近闭合目标时才给分
+#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
+    
+#     # 5. 最终奖励
+#     # 只有三者同时满足才给 1.0 分
+#     reward = is_in_sphere * is_between * is_closing
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+# def lid_is_lifted(
+#     env:ManagerBasedRLEnv,
+#     minimal_height:float,
+#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
+# ) -> torch.Tensor:
+
+#     lid = env.scene[lid_cfg.name]
+#     lid_height = lid.data.root_pos_w[:, 2]
+#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
+#     reward=torch.nan_to_num(reward, nan=0.0)
+    
+#     return reward
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
+
+
+def base_height_failure(env: ManagerBasedRLEnv,
+                        minimum_height: float | None = None,
+                        maximum_height: float | None = None,
+                        asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Terminate when the robot's base height is outside the specified range."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    root_pos_z = asset.data.root_pos_w[:, 2]
+    
+    # check if height is outside the range
+    out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
+    if minimum_height is not None:
+        out_of_bounds |= root_pos_z < minimum_height
+    if maximum_height is not None:
+        out_of_bounds |= root_pos_z > maximum_height
+        
+    return out_of_bounds
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py
@@ -0,0 +1,578 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import A, T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.sensors import CameraCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, 0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.2, 0.65, 0.9],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+ROOM_CFG = AssetBaseCfg(
+    prim_path="{ENV_REGEX_NS}/Room",
+    spawn=UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+    ),
+)
+
+# ROOM_CFG = RigidObjectCfg(
+#     prim_path="{ENV_REGEX_NS}/Room",
+#     init_state=RigidObjectCfg.InitialStateCfg(
+#         pos=[0.0, 0.0, 0.0],
+#         rot=[1.0, 0.0, 0.0, 0.0],
+#         lin_vel=[0.0, 0.0, 0.0],
+#         ang_vel=[0.0, 0.0, 0.0],
+#     ),
+#     spawn=UsdFileCfg(
+#         usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+#         # scale=(0.2, 0.2, 0.2),
+#         rigid_props=RigidBodyPropertiesCfg(
+#             rigid_body_enabled= True,
+#             solver_position_iteration_count=32,
+#             solver_velocity_iteration_count=16,
+#             max_angular_velocity=1000.0,
+#             max_linear_velocity=1000.0,
+#             max_depenetration_velocity=0.5,#original 5.0
+#             linear_damping=5.0, #original 0.5
+#             angular_damping=5.0, #original 0.5
+#             disable_gravity=False,
+#         ),
+#         collision_props=CollisionPropertiesCfg(
+#             collision_enabled=True,
+#             contact_offset=0.0005,#original 0.02
+#             rest_offset=0
+#         ),
+#     ),
+# )
+
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=[".*"],
+            
+            stiffness=0.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class MindbotSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+    # head_camera=CameraCfg(
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+    #     update_period=1/120,
+    #     height=480,
+    #     width=640,
+    #     data_type=["rgb"],
+    #     spawn=None,
+    # )
+    left_hand_camera=CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", 
+        update_period=1/120,
+        height=480,
+        width=640,
+        data_types=["rgb"],
+        spawn=None,
+    )
+    right_hand_camera=CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", 
+        update_period=1/120,
+        height=480,
+        width=640,
+        data_types=["rgb"],
+        spawn=None,
+    )
+    head_camera=CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+        update_period=1/120,
+        height=480,
+        width=640,
+        data_types=["rgb"],
+        spawn=None,
+    )
+    chest_camera=CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", 
+        update_period=1/120,
+        height=480,
+        width=640,
+        data_types=["rgb"],
+        spawn=None,
+    )
+##
+# MDP settings
+##
+
+def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    left_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["l_joint[1-6]"],  # 左臂的6个关节
+        body_name="left_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+    
+    # right_arm_fixed = mdp.JointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
+        
+    #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
+    #     scale=0.0, 
+        
+    #     # 2. 偏移设为你的目标角度（弧度）：这告诉电机“永远停在这里”
+    #     # 对应 (135, -70, -90, 90, 90, -70)
+    #     offset={
+    #         "r_joint1": 2.3562,
+    #         "r_joint2": -1.2217,
+    #         "r_joint3": -1.5708,
+    #         "r_joint4": 1.5708,
+    #         "r_joint5": 1.5708,
+    #         "r_joint6": -1.2217,
+    #     },
+    # )
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+        
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
+        
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+    )
+
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.3,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
+        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    # 重置所有关节到 init_state（无偏移）must be
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 2. 底座安装误差 (建议缩小到 2cm)
+    reset_mindbot_base = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "pose_range": {
+                "x": (-0.05, 0.05),
+                "y": (-0.05, 0.05),
+                "z": (0.74, 0.75),
+                "yaw": (-0.1, 0.1),
+            },
+            "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
+        },
+    )
+    # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度，很合适)
+    reset_mindbot_left_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+            "position_range": (-0.01, 0.01),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_ultrasound = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("ultrasound"), 
+            "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+    reset_lid = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    gripper_lid_orientation_alignment = RewTerm(
+        func=mdp.gripper_lid_orientation_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "gripper_body_name": "left_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 1.0, 0.0),
+            "max_angle_deg": 85.0,  # 允许60度内的偏差
+        },
+        weight=5.0 #original 5.0
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    gripper_lid_position_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度    
+        },
+        weight=3.0 #original 3.0
+    )
+    approach_lid_penalty = RewTerm(
+        func=mdp.gripper_lid_distance_penalty,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,
+        },
+        # weight 为负数表示惩罚。
+        # 假设距离 0.5m，惩罚就是 -0.5 * 2.0 = -1.0 分。
+        weight=-1.0 
+    )
+    # 【新增】精细对齐 (引导进入 2cm 圈)
+    gripper_lid_fine_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+        },
+        weight=10.0 # 高权重
+    )
+
+    gripper_close_interaction = RewTerm( 
+        func=mdp.gripper_close_when_near,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
+            "target_close_pos": 0.03,
+            "height_offset": 0.04,
+            "grasp_radius": 0.03,
+        },
+        weight=10.0
+    )
+
+    lid_lifted_reward = RewTerm(
+        func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+        },
+        weight=10.0 # 给一个足够大的权重
+    )
+
+    lid_lifting_reward = RewTerm(
+        func=mdp.lid_lift_progress_reward,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+            "target_height_lift": 0.2,
+            "height_offset": 0.07, # 与其他奖励保持一致
+            "std": 0.1
+        },
+        # 权重设大一点，告诉它这是最终目标
+        weight=10.0 
+    )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    lid_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    )
+
+    # 新增：盖子掉落判定
+    # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    lid_dropped = DoneTerm(
+        func=mdp.base_height_failure, # 复用高度判定函数
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+        },
+    )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class MindbotEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (0, 2.5, 2.6)#(3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
--- a/test.py
+++ b/test.py
@@ -0,0 +1,282 @@
+import argparse
+import sys
+import os
+import time
+
+import imageio
+import numpy as np
+import h5py
+import torch
+
+# 先引入 AppLauncher，并尽早实例化 SimulationApp，避免 pxr 未加载
+from isaaclab.app import AppLauncher
+import cli_args  # isort: skip
+
+# CLI
+parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
+parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+parser.add_argument("--max_steps", type=int, default=None, help="最大步数，达到后提前退出")
+cli_args.add_rsl_rl_args(parser)
+AppLauncher.add_app_launcher_args(parser)
+args_cli, hydra_args = parser.parse_known_args()
+if args_cli.video:
+    args_cli.enable_cameras = True
+# 将 hydra 剩余参数传递
+sys.argv = [sys.argv[0]] + hydra_args
+
+# ==== 先实例化 SimulationApp ====
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
+import gymnasium as gym
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.dict import print_dict
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+import mindbot.tasks  # noqa: F401
+
+CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] No pre-trained checkpoint for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+    env_cfg.log_dir = log_dir
+
+    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    try:
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        policy_nn = runner.alg.actor_critic
+
+    # 导出模型
+    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+    normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+    # dt = env.unwrapped.step_dt
+
+    # # 录制缓冲
+    # cam_buffers = {n: [] for n in CAM_NAMES}
+    # joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    # t0 = time.time()
+
+    # obs = env.get_observations()
+    # timestep = 0
+    # step_count = 0
+    # try:
+    #     while simulation_app.is_running():
+    #         start_time = time.time()
+    #         with torch.inference_mode():
+    #             actions = policy(obs)
+    #             obs, _, dones, _ = env.step(actions)
+    #             policy_nn.reset(dones)
+
+    #         # 相机帧（取 env0，如需全部 env 去掉 [0]）
+    #         for name in CAM_NAMES:
+    #             if name not in env.unwrapped.scene.sensors:
+    #                 continue
+    #             cam = env.unwrapped.scene.sensors[name]
+    #             rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+    #             if rgba is None:
+    #                 continue
+    #             frame = rgba[0].cpu().numpy()
+    #             if frame.shape[-1] == 4:
+    #                 frame = frame[..., :3]
+    #             cam_buffers[name].append(frame)
+
+    #         # 关节 / 速度 / 动作
+    #         robot = env.unwrapped.scene["Mindbot"]
+    #         joint_log.append(robot.data.joint_pos.cpu().numpy())
+    #         joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+    #         action_log.append(actions.cpu().numpy())
+    #         ts_log.append(time.time() - t0)
+
+    #         step_count += 1
+    #         if args_cli.max_steps and step_count >= args_cli.max_steps:
+    #             break
+
+    #         if args_cli.video:
+    #             timestep += 1
+    #             if timestep == args_cli.video_length:
+    #                 break
+
+    #         sleep_time = dt - (time.time() - start_time)
+    #         if args_cli.real_time and sleep_time > 0:
+    #             time.sleep(sleep_time)
+    # finally:
+    #     # 保存 HDF5
+    #     h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    #     with h5py.File(h5_path, "w") as f:
+    #         f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+    #         f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+    #         f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+    #         f.create_dataset("timestamps", data=np.array(ts_log))
+    #         for name, frames in cam_buffers.items():
+    #             if not frames:
+    #                 continue
+    #             dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+    #             if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+    #                 fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+    #             else:
+    #                 fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+    #             dset.attrs["fx"] = fx
+    #             dset.attrs["fy"] = fy
+    #             dset.attrs["cx"] = cx
+    #             dset.attrs["cy"] = cy
+    #             dset.attrs["focal_length"] = 1.93
+    #     print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    #     # 可选：单路 MP4
+    #     head_frames = cam_buffers["cam_head"]
+    #     if head_frames:
+    #         fps = int(round(1.0 / dt))
+    #         video_path = os.path.join(log_dir, "cam_head.mp4")
+    #         imageio.mimsave(video_path, head_frames, fps=fps)
+    #         print(f"[INFO] Saved video to {video_path}")
+
+    #     env.close()
+    dt = env.unwrapped.step_dt
+
+    # 录制缓冲
+    cam_buffers = {n: [] for n in CAM_NAMES}
+    joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    t0 = time.time()
+
+    obs = env.get_observations()
+    timestep = 0
+    while simulation_app.is_running():
+        start_time = time.time()
+        with torch.inference_mode():
+            actions = policy(obs)
+            obs, _, dones, _ = env.step(actions)
+            policy_nn.reset(dones)
+
+        # 相机帧（取 env0，如需全部 env 去掉 [0]）
+        for name in CAM_NAMES:
+            if name not in env.unwrapped.scene.sensors:
+                continue
+            cam = env.unwrapped.scene.sensors[name]
+            rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+            if rgba is None:
+                continue
+            frame = rgba[0].cpu().numpy()
+            if frame.shape[-1] == 4:
+                frame = frame[..., :3]
+            cam_buffers[name].append(frame)
+
+        # 关节 / 速度 / 动作
+        robot = env.unwrapped.scene["Mindbot"]
+        joint_log.append(robot.data.joint_pos.cpu().numpy())
+        joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+        action_log.append(actions.cpu().numpy())
+        ts_log.append(time.time() - t0)
+
+        if args_cli.video:
+            timestep += 1
+            if timestep == args_cli.video_length:
+                break
+
+        sleep_time = dt - (time.time() - start_time)
+        if args_cli.real_time and sleep_time > 0:
+            time.sleep(sleep_time)
+
+    # 保存 HDF5
+    h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    with h5py.File(h5_path, "w") as f:
+        f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+        f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+        f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+        f.create_dataset("timestamps", data=np.array(ts_log))
+        for name, frames in cam_buffers.items():
+            if not frames:
+                continue
+            dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+            # 内参：按你相机 vector 设置
+            if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+                fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+            else:
+                fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+            dset.attrs["fx"] = fx
+            dset.attrs["fy"] = fy
+            dset.attrs["cx"] = cx
+            dset.attrs["cy"] = cy
+            dset.attrs["focal_length"] = 1.93
+    print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    # 可选：单路 MP4
+    head_frames = cam_buffers["cam_head"]
+    if head_frames:
+        fps = int(round(1.0 / dt))
+        video_path = os.path.join(log_dir, "cam_head.mp4")
+        imageio.mimsave(video_path, head_frames, fps=fps)
+        print(f"[INFO] Saved video to {video_path}")
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
+
+# import h5py
+
+# h5_path = r"C:\Users\PC\workpalce\mindbot\logs\rsl_rl\mindbot_grasp\2026-01-15_11-51-00\rollout_multi_cam.h5"
+
+# with h5py.File(h5_path, "r") as f:
+#     def walk(name, obj):
+#         if isinstance(obj, h5py.Dataset):
+#             print(f"{name}: shape={obj.shape}, dtype={obj.dtype}")
+#         else:
+#             print(f"{name}/")
+#     f.visititems(walk)
--- a/test_sence.py
+++ b/test_sence.py
@@ -0,0 +1,73 @@
+from isaaclab.app import AppLauncher
+import argparse
+
+# -----------------------------------------------------------------------------
+# 1. 启动 Isaac Sim
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+AppLauncher.add_app_launcher_args(parser)
+args = parser.parse_args()
+
+app_launcher = AppLauncher(args)
+simulation_app = app_launcher.app
+
+# -----------------------------------------------------------------------------
+# 2. 必须先创建 SimulationContext（PhysX 在这里初始化）
+# -----------------------------------------------------------------------------
+import isaaclab.sim as sim_utils
+
+sim_cfg = sim_utils.SimulationCfg(
+    dt=1 / 120,
+    device=args.device,
+)
+sim = sim_utils.SimulationContext(sim_cfg)
+
+# （可选）设置相机
+sim.set_camera_view(
+    eye=[3.0, 0.0, 2.0],
+    target=[0.0, 0.0, 0.5],
+)
+
+# -----------------------------------------------------------------------------
+# 3. 再 play timeline（这一步非常关键）
+# -----------------------------------------------------------------------------
+import omni.timeline
+
+timeline = omni.timeline.get_timeline_interface()
+timeline.play()
+
+# -----------------------------------------------------------------------------
+# 4. 现在才可以安全创建 Env
+# -----------------------------------------------------------------------------
+import torch
+from isaaclab.envs import ManagerBasedRLEnv
+from mindbot.tasks.manager_based.pull.pull_env_cfg import PullEnvCfg
+
+
+def main():
+    env_cfg = PullEnvCfg()
+    env_cfg.scene.num_envs = 1
+
+    env = ManagerBasedRLEnv(cfg=env_cfg)
+
+    obs, _ = env.reset()
+    print("[OK] Env reset")
+
+    for step in range(100000):
+        actions = torch.zeros(
+            env.num_envs,
+            env.action_manager.total_action_dim,
+            device=env.device,
+        )
+        obs, rew, done, trunc, info = env.step(actions)
+
+        if step % 100 == 0:
+            root_pos = env.scene["Mindbot"].data.root_pos_w[0]
+            print(f"step {step}, root z = {root_pos[2].item():.3f}")
+
+    print("[DONE]")
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()