本地4090代码提交

2026-01-28 19:51:03 +08:00
parent eddcdd428f
commit 196378f2d3
41 changed files with 8447 additions and 450 deletions
--- a/scripts/rsl_rl/play.py
+++ b/scripts/rsl_rl/play.py
@@ -9,6 +9,12 @@

 import argparse
 import sys
+import os
+import torch
+import torchvision
+import gymnasium as gym
+import time
+from datetime import datetime

 from isaaclab.app import AppLauncher

@@ -18,7 +24,7 @@ import cli_args  # isort: skip
 # add argparse arguments
 parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
 parser.add_argument(
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -53,11 +59,6 @@ simulation_app = app_launcher.app

 """Rest everything follows."""

-import gymnasium as gym
-import os
-import time
-import torch
-
 from rsl_rl.runners import DistillationRunner, OnPolicyRunner

 from isaaclab.envs import (
@@ -79,6 +80,215 @@ from isaaclab_tasks.utils.hydra import hydra_task_config

 import mindbot.tasks  # noqa: F401

+# ==============================================================================
+#  [Update] 多相机、多环境录制类
+# ==============================================================================
+# 1. 确保文件顶部有以下导入
+import imageio
+import numpy as np
+
+"""use gpu compute to record video"""
+
+class MultiCameraRecorder:
+    def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+        self.env = env
+        self.camera_names = camera_names
+        self.env_indices = env_indices
+        self.output_dir = output_dir
+        self.fps = fps
+        self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
+        os.makedirs(self.output_dir, exist_ok=True)
+        
+        self.cameras = {}
+        for name in camera_names:
+            if name in self.env.unwrapped.scene.keys():
+                self.cameras[name] = self.env.unwrapped.scene[name]
+                print(f"[INFO] Camera {name} linked.")
+
+    def record_step(self):
+        """保持在 GPU 上克隆数据"""
+        for cam_name, camera_obj in self.cameras.items():
+            # 获取数据前强制同步一次（防止后端丢失）
+            rgb_data = camera_obj.data.output["rgb"] 
+            
+            for env_idx in self.env_indices:
+                # 使用 .clone() 保持在 GPU，但要注意显存占用
+                self.frames[cam_name][env_idx].append(rgb_data[env_idx].clone())
+
+    def save_videos(self, filename_suffix=""):
+        print(f"[INFO] Saving videos from GPU to Disk...")
+        for cam_name, env_dict in self.frames.items():
+            for env_idx, frame_list in env_dict.items():
+                if not frame_list: continue
+                
+                # 转换为 torchvision 格式 (T, C, H, W)
+                video_tensor = torch.stack(frame_list)
+                if video_tensor.shape[-1] == 4: # RGBA -> RGB
+                    video_tensor = video_tensor[..., :3]
+                
+                # 移动到 CPU 并保存
+                video_cpu = video_tensor.cpu()
+                output_path = os.path.join(self.output_dir, f"{cam_name}_env{env_idx}_{filename_suffix}.mp4")
+                
+                # 使用 torchvision 写入 (T, H, W, C)
+                torchvision.io.write_video(output_path, video_cpu, fps=self.fps)
+                
+                # 【关键】保存后立即释放显存
+                del video_tensor
+                del video_cpu
+                frame_list.clear() 
+        
+        torch.cuda.empty_cache()
+
+"""use cpu compute to record video"""
+# # 2. 修改 MultiCameraRecorder 类
+# class MultiCameraRecorder:
+#     def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_names = camera_names
+#         self.env_indices = env_indices
+#         self.output_dir = output_dir
+#         self.fps = fps
+#         self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
+#         os.makedirs(self.output_dir, exist_ok=True)
+#         self.cameras = {}
+#         for name in camera_names:
+#             try:
+#                 self.cameras[name] = self.env.unwrapped.scene[name]
+#                 print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
+#             except KeyError:
+#                 print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found!")
+
+#     def record_step(self):
+#         """在每个仿真步调用"""
+#         for cam_name, camera_obj in self.cameras.items():
+#             # [关键修改] 获取数据前先确保数据已同步
+#             # 这可以防止读取到正在渲染中的内存导致 access violation
+#             rgb_data = camera_obj.data.output["rgb"]
+            
+#             for env_idx in self.env_indices:
+#                 if env_idx >= rgb_data.shape[0]: continue
+                
+#                 # 转换为 CPU 上的 numpy，这种方式通常比 torchvision 的 tensor 堆叠更稳
+#                 frame = rgb_data[env_idx].clone().detach().cpu().numpy()
+#                 self.frames[cam_name][env_idx].append(frame)
+
+#     def save_videos(self, filename_suffix=""):
+#         """循环结束后调用"""
+#         print(f"[INFO][MultiCameraRecorder] Saving videos...")
+#         for cam_name, env_dict in self.frames.items():
+#             for env_idx, frame_list in env_dict.items():
+#                 if not frame_list: continue
+                
+#                 print(f"  -> Saving {cam_name} (Env {env_idx})...")
+                
+#                 # 处理格式并使用 imageio 保存
+#                 processed_frames = []
+#                 for img in frame_list:
+#                     # [0, 1] -> [0, 255]
+#                     if img.dtype != np.uint8:
+#                         if img.max() <= 1.01: img = (img * 255).astype(np.uint8)
+#                         else: img = img.astype(np.uint8)
+#                     # 去掉 Alpha 通道
+#                     if img.shape[-1] == 4: img = img[:, :, :3]
+#                     processed_frames.append(img)
+
+#                 fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
+#                 output_path = os.path.join(self.output_dir, fname)
+                
+#                 try:
+#                     # 使用 imageio 写入视频
+#                     imageio.mimsave(output_path, processed_frames, fps=self.fps)
+#                     print(f"     Saved: {output_path}")
+#                 except Exception as e:
+#                     print(f"     [ERROR] Failed to save {fname}: {e}")
+
+
+
+# class MultiCameraRecorder:
+#     """
+#     用于从 Isaac Lab 环境中录制多个相机的图像数据，并支持多个环境实例。
+#     """
+#     def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_names = camera_names
+#         self.env_indices = env_indices # 要录制的环境索引列表，例如 [0, 1]
+#         self.output_dir = output_dir
+#         self.fps = fps
+        
+#         # 数据结构: self.frames[camera_name][env_idx] = [list of tensors]
+#         self.frames = {
+#             cam_name: {env_idx: [] for env_idx in env_indices} 
+#             for cam_name in camera_names
+#         }
+
+#         os.makedirs(self.output_dir, exist_ok=True)
+
+#         # 获取场景中的相机对象
+#         self.cameras = {}
+#         for name in camera_names:
+#             try:
+#                 # 尝试获取相机句柄
+#                 self.cameras[name] = self.env.unwrapped.scene[name]
+#                 print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
+#             except KeyError:
+#                 print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found in scene! Skipping.")
+
+#     def record_step(self):
+#         """在每个仿真步调用"""
+#         for cam_name, camera_obj in self.cameras.items():
+#             # 获取 RGB 数据 (Shape: [num_envs, H, W, C])
+#             # 注意：如果数据类型是 RGBA，后面处理
+#             rgb_data = camera_obj.data.output["rgb"]
+
+#             for env_idx in self.env_indices:
+#                 # 边界检查
+#                 if env_idx >= rgb_data.shape[0]:
+#                     continue
+
+#                 # [关键] 立即 clone 并转到 CPU。
+#                 # 如果不转 CPU，4个相机 * 长时间录制会迅速耗尽 GPU 显存。
+#                 frame = rgb_data[env_idx].clone().detach().cpu()
+                
+#                 self.frames[cam_name][env_idx].append(frame)
+
+#     def save_videos(self, filename_suffix=""):
+#         """循环结束后调用，保存所有视频"""
+#         print(f"[INFO][MultiCameraRecorder] Saving videos for {len(self.cameras)} cameras...")
+        
+#         for cam_name, env_dict in self.frames.items():
+#             for env_idx, frame_list in env_dict.items():
+#                 if not frame_list:
+#                     continue
+                
+#                 print(f"  -> Processing {cam_name} (Env {env_idx}) with {len(frame_list)} frames...")
+                
+#                 # 堆叠 (T, H, W, C)
+#                 video_tensor = torch.stack(frame_list)
+
+#                 # 去除 Alpha 通道
+#                 if video_tensor.shape[-1] == 4:
+#                     video_tensor = video_tensor[..., :3]
+
+#                 # (T, H, W, C) -> (T, C, H, W)
+#                 video_tensor = video_tensor.permute(0, 3, 1, 2)
+
+#                 # 确保是 uint8
+#                 if video_tensor.dtype != torch.uint8:
+#                     if video_tensor.max() <= 1.0:
+#                         video_tensor = (video_tensor * 255).to(torch.uint8)
+#                     else:
+#                         video_tensor = video_tensor.to(torch.uint8)
+
+#                 # 文件名: left_hand_camera_env0_2024-xx-xx.mp4
+#                 fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
+#                 output_path = os.path.join(self.output_dir, fname)
+                
+#                 torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
+#                 print(f"     Saved: {output_path}")
+
+# ==============================================================================
+

@hydra_task_config(args_cli.task, args_cli.agent)
 def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
@@ -92,7 +302,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs

    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
    env_cfg.seed = agent_cfg.seed
    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device

@@ -122,7 +331,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    if isinstance(env.unwrapped, DirectMARLEnv):
        env = multi_agent_to_single_agent(env)

-    # wrap for video recording
+    # wrap for video recording (standard Gym recording - Viewport)
    if args_cli.video:
        video_kwargs = {
            "video_folder": os.path.join(log_dir, "videos", "play"),
@@ -151,12 +360,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    policy = runner.get_inference_policy(device=env.unwrapped.device)

    # extract the neural network module
-    # we do this in a try-except to maintain backwards compatibility.
    try:
-        # version 2.3 onwards
        policy_nn = runner.alg.policy
    except AttributeError:
-        # version 2.2 and below
        policy_nn = runner.alg.actor_critic

    # extract the normalizer
@@ -174,9 +380,42 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

    dt = env.unwrapped.step_dt

+    # ==========================================================================
+    # 初始化多相机录制器
+    # ==========================================================================
+    recorder = None
+    if args_cli.video:
+        # 1. 定义要录制的相机列表（需与 mindbot_env_cfg.py 中的命名一致）
+        target_cameras = [
+            "left_hand_camera",
+            "right_hand_camera",
+            "head_camera",
+            "chest_camera"
+        ]
+        
+        # 2. 定义要录制的环境索引 (env_nums)
+        # 默认只录制第 0 个环境，如果要录制前 2 个，改为 [0, 1]
+        # 注意：不要录制太多环境，否则写入视频时非常慢且占内存
+        envs_to_record = [0,1,2,3]
+        # 如果你想录制所有环境（不推荐，除非num_envs很小）：
+        # envs_to_record = list(range(env.unwrapped.num_envs))
+
+        save_dir = os.path.join(log_dir, "robot_camera_recordings")
+        
+        recorder = MultiCameraRecorder(
+            env=env, 
+            camera_names=target_cameras, 
+            env_indices=envs_to_record,
+            output_dir=save_dir, 
+            fps=int(1/dt)
+        )
+
    # reset environment
    obs = env.get_observations()
    timestep = 0
+    
+    print("[INFO] Starting simulation loop...")
+    
    # simulate environment
    while simulation_app.is_running():
        start_time = time.time()
@@ -186,12 +425,26 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
            actions = policy(obs)
            # env stepping
            obs, _, dones, _ = env.step(actions)
+            
+            # ==================================================================
+            # 每一帧录制数据
+            # ==================================================================
+            if recorder:
+                recorder.record_step()
+
            # reset recurrent states for episodes that have terminated
            policy_nn.reset(dones)
+
        if args_cli.video:
            timestep += 1
            # Exit the play loop after recording one video
            if timestep == args_cli.video_length:
+                # ==============================================================
+                # 循环结束时保存所有视频
+                # ==============================================================
+                if recorder:
+                    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+                    recorder.save_videos(filename_suffix=timestamp)
                break

        # time delay for real-time evaluation
@@ -208,3 +461,564 @@ if __name__ == "__main__":
    main()
    # close sim app
    simulation_app.close()
+
+# # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# # All rights reserved.
+# #
+# # SPDX-License-Identifier: BSD-3-Clause
+
+# """Script to play a checkpoint if an RL agent from RSL-RL."""
+
+# """Launch Isaac Sim Simulator first."""
+
+# import argparse
+# import sys
+# import os
+# import torch
+# import torchvision
+# import imageio
+# import numpy as np
+# import gymnasium as gym
+# import time
+# from datetime import datetime
+
+# from isaaclab.app import AppLauncher
+
+# # local imports
+# import cli_args  # isort: skip
+
+# # add argparse arguments
+# parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+# parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+# parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
+# parser.add_argument(
+#     "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+# )
+# parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# parser.add_argument(
+#     "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+# )
+# parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# parser.add_argument(
+#     "--use_pretrained_checkpoint",
+#     action="store_true",
+#     help="Use the pre-trained checkpoint from Nucleus.",
+# )
+# parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# # append RSL-RL cli arguments
+# cli_args.add_rsl_rl_args(parser)
+# # append AppLauncher cli args
+# AppLauncher.add_app_launcher_args(parser)
+# # parse the arguments
+# args_cli, hydra_args = parser.parse_known_args()
+# # always enable cameras to record video
+# if args_cli.video:
+#     args_cli.enable_cameras = True
+
+# # clear out sys.argv for Hydra
+# sys.argv = [sys.argv[0]] + hydra_args
+
+# # launch omniverse app
+# app_launcher = AppLauncher(args_cli)
+# simulation_app = app_launcher.app
+
+# """Rest everything follows."""
+
+# from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+# from isaaclab.envs import (
+#     DirectMARLEnv,
+#     DirectMARLEnvCfg,
+#     DirectRLEnvCfg,
+#     ManagerBasedRLEnvCfg,
+#     multi_agent_to_single_agent,
+# )
+# from isaaclab.utils.assets import retrieve_file_path
+# from isaaclab.utils.dict import print_dict
+
+# from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+# from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+# import isaaclab_tasks  # noqa: F401
+# from isaaclab_tasks.utils import get_checkpoint_path
+# from isaaclab_tasks.utils.hydra import hydra_task_config
+
+# import mindbot.tasks  # noqa: F401
+
+# # ==============================================================================
+# #  新增：自定义相机录制类
+# # ==============================================================================
+# class CameraRecorder:
+#     """
+#     用于从 Isaac Lab 环境中录制指定相机的图像数据并保存为视频。
+#     """
+#     def __init__(self, env, camera_name: str, output_dir: str, fps: int = 30):
+#         self.env = env
+#         self.camera_name = camera_name
+#         self.output_dir = output_dir
+#         self.fps = fps
+#         self.frames = [] # 用于存储每一帧的 Tensor
+
+#         # 确保输出目录存在
+#         os.makedirs(self.output_dir, exist_ok=True)
+
+#         # 尝试从场景中获取相机对象
+#         # env.unwrapped 访问原始的 ManagerBasedRLEnv
+#         try:
+#             self.camera = self.env.unwrapped.scene[camera_name]
+#             print(f"[INFO][CameraRecorder] Successfully found camera: {camera_name}")
+#         except KeyError:
+#             print(f"[ERROR][CameraRecorder] Camera '{camera_name}' not found in scene entities!")
+#             print(f"Available entities: {self.env.unwrapped.scene.keys()}")
+#             raise
+
+#     def record_step(self):
+#         """在每个仿真步调用，获取图像数据"""
+#         # 获取 RGB 数据。注意：Isaac Lab 的输出通常是 (num_envs, H, W, C)
+#         # 我们这里只取第一个环境 (env_index=0)
+#         # output["rgb"] 可能是 RGBA 或 RGB
+#         rgb_tensor = self.camera.data.output["rgb"][0] 
+
+#         # 如果数据在 GPU 上，克隆一份并 detach，防止被后续步骤覆盖
+#         # 此时先不转 CPU，以免拖慢仿真循环，最后保存时再转
+#         self.frames.append(rgb_tensor.clone().detach())
+
+#     def save_video(self, filename="robot_camera_video.mp4"):
+#         """循环结束后调用，将内存中的帧保存为视频"""
+#         if not self.frames:
+#             print("[WARN][CameraRecorder] No frames recorded.")
+#             return
+
+#         print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
+        
+#         # 将 tensor 转换为 numpy 格式
+#         video_frames = []
+#         for frame in self.frames:
+#             # 转换为 CPU 上的 numpy 数组 (H, W, C)
+#             img = frame.cpu().numpy()
+#             # 处理归一化数据 [0, 1] -> [0, 255]
+#             if img.dtype != np.uint8:
+#                 if img.max() <= 1.01:
+#                     img = (img * 255).astype(np.uint8)
+#                 else:
+#                     img = img.astype(np.uint8)
+#             # 处理 RGBA -> RGB
+#             if img.shape[-1] == 4:
+#                 img = img[:, :, :3]
+#             video_frames.append(img)
+
+#         output_path = os.path.join(self.output_dir, filename)
+        
+#         # 使用 imageio 写入视频，这种方式在 Windows 上更稳定
+#         try:
+#             imageio.mimsave(output_path, video_frames, fps=self.fps)
+#             print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
+#         except Exception as e:
+#             print(f"[ERROR][CameraRecorder] Failed to save video using imageio: {e}")
+#             # 如果 imageio 也失败，尝试最后的备选方案
+#             print("[INFO] Attempting to save frames as images instead...")
+#             # ... (可选：保存为一系列图片)
+#     # def save_video(self, filename="robot_camera_video.mp4"):
+#     #     """循环结束后调用，将内存中的帧保存为视频"""
+#     #     if not self.frames:
+#     #         print("[WARN][CameraRecorder] No frames recorded.")
+#     #         return
+
+#     #     print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
+        
+#     #     # 堆叠帧 -> (T, H, W, C)
+#     #     video_tensor = torch.stack(self.frames)
+
+#     #     # 处理 Alpha 通道：如果是 RGBA (4通道)，只取前3个 RGB
+#     #     if video_tensor.shape[-1] == 4:
+#     #         video_tensor = video_tensor[..., :3]
+
+#     #     # 转换为 (T, C, H, W) 以符合 torchvision 的输入要求
+#     #     # 原始: (T, H, W, C) -> Permute -> (T, C, H, W)
+#     #     video_tensor = video_tensor.permute(0, 3, 1, 2)
+
+#     #     # 确保是 uint8 类型 [0, 255]
+#     #     if video_tensor.dtype != torch.uint8:
+#     #          # 如果是 float [0,1]，则 * 255 并转 uint8
+#     #         if video_tensor.max() <= 1.0:
+#     #             video_tensor = (video_tensor * 255).to(torch.uint8)
+#     #         else:
+#     #             video_tensor = video_tensor.to(torch.uint8)
+
+#     #     # 移动到 CPU
+#     #     video_tensor = video_tensor.cpu()
+
+#     #     output_path = os.path.join(self.output_dir, filename)
+        
+#     #     # 使用 torchvision 保存视频
+#     #     torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
+#     #     print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
+
+# # ==============================================================================
+
+
+# @hydra_task_config(args_cli.task, args_cli.agent)
+# def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+#     """Play with RSL-RL agent."""
+#     # grab task name for checkpoint path
+#     task_name = args_cli.task.split(":")[-1]
+#     train_task_name = task_name.replace("-Play", "")
+
+#     # override configurations with non-hydra CLI arguments
+#     agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+#     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+#     # set the environment seed
+#     env_cfg.seed = agent_cfg.seed
+#     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+#     # specify directory for logging experiments
+#     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+#     log_root_path = os.path.abspath(log_root_path)
+#     print(f"[INFO] Loading experiment from directory: {log_root_path}")
+#     if args_cli.use_pretrained_checkpoint:
+#         resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+#         if not resume_path:
+#             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+#             return
+#     elif args_cli.checkpoint:
+#         resume_path = retrieve_file_path(args_cli.checkpoint)
+#     else:
+#         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+#     log_dir = os.path.dirname(resume_path)
+
+#     # set the log directory for the environment (works for all environment types)
+#     env_cfg.log_dir = log_dir
+
+#     # create isaac environment
+#     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+#     # convert to single-agent instance if required by the RL algorithm
+#     if isinstance(env.unwrapped, DirectMARLEnv):
+#         env = multi_agent_to_single_agent(env)
+
+#     # wrap for video recording (standard Gym recording)
+#     # 注意：这只是录制 Viewport 的画面，不是你机器人相机的画面
+#     if args_cli.video:
+#         video_kwargs = {
+#             "video_folder": os.path.join(log_dir, "videos", "play"),
+#             "step_trigger": lambda step: step == 0,
+#             "video_length": args_cli.video_length,
+#             "disable_logger": True,
+#         }
+#         print("[INFO] Recording videos during training.")
+#         print_dict(video_kwargs, nesting=4)
+#         env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+#     # wrap around environment for rsl-rl
+#     env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+#     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+#     # load previously trained model
+#     if agent_cfg.class_name == "OnPolicyRunner":
+#         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+#     elif agent_cfg.class_name == "DistillationRunner":
+#         runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+#     else:
+#         raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+#     runner.load(resume_path)
+
+#     # obtain the trained policy for inference
+#     policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+#     # extract the neural network module
+#     try:
+#         policy_nn = runner.alg.policy
+#     except AttributeError:
+#         policy_nn = runner.alg.actor_critic
+
+#     # extract the normalizer
+#     if hasattr(policy_nn, "actor_obs_normalizer"):
+#         normalizer = policy_nn.actor_obs_normalizer
+#     elif hasattr(policy_nn, "student_obs_normalizer"):
+#         normalizer = policy_nn.student_obs_normalizer
+#     else:
+#         normalizer = None
+
+#     # export policy to onnx/jit
+#     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+#     export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+#     export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+#     dt = env.unwrapped.step_dt
+
+#     # ==========================================================================
+#     # 初始化自定义相机录制器
+#     # ==========================================================================
+#     recorder = None
+#     if args_cli.video:
+#         # 这里的 output_dir 设为日志目录下的 hand_camera_videos
+#         save_dir = os.path.join(log_dir, "hand_camera_videos")
+#         # 这里的 "left_hand_camera" 必须对应 mindbot_env_cfg.py 中定义的名称
+#         recorder = CameraRecorder(env, "left_hand_camera", save_dir, fps=int(1/dt))
+
+#     # reset environment
+#     obs = env.get_observations()
+#     timestep = 0
+    
+#     print("[INFO] Starting simulation loop...")
+    
+#     # simulate environment
+#     while simulation_app.is_running():
+#         start_time = time.time()
+#         # run everything in inference mode
+#         with torch.inference_mode():
+#             # agent stepping
+#             actions = policy(obs)
+#             # env stepping
+#             obs, _, dones, _ = env.step(actions)
+            
+#             # ==================================================================
+#             # 每一帧录制数据
+#             # ==================================================================
+#             if recorder:
+#                 recorder.record_step()
+
+#             # reset recurrent states for episodes that have terminated
+#             policy_nn.reset(dones)
+
+#         if args_cli.video:
+#             timestep += 1
+#             # Exit the play loop after recording one video
+#             if timestep == args_cli.video_length:
+#                 # ==============================================================
+#                 # 循环结束时保存视频
+#                 # ==============================================================
+#                 if recorder:
+#                     timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+#                     recorder.save_video(filename=f"hand_cam_{timestamp}.mp4")
+#                 break
+
+#         # time delay for real-time evaluation
+#         sleep_time = dt - (time.time() - start_time)
+#         if args_cli.real_time and sleep_time > 0:
+#             time.sleep(sleep_time)
+
+#     # close the simulator
+#     env.close()
+
+
+# if __name__ == "__main__":
+#     # run the main function
+#     main()
+#     # close sim app
+#     simulation_app.close()
+
+
+# # # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# # # All rights reserved.
+# # #
+# # # SPDX-License-Identifier: BSD-3-Clause
+
+# # """Script to play a checkpoint if an RL agent from RSL-RL."""
+
+# # """Launch Isaac Sim Simulator first."""
+
+# # import argparse
+# # import sys
+
+# # from isaaclab.app import AppLauncher
+
+# # # local imports
+# # import cli_args  # isort: skip
+
+# # # add argparse arguments
+# # parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+# # parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+# # parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+# # parser.add_argument(
+# #     "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+# # )
+# # parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# # parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# # parser.add_argument(
+# #     "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+# # )
+# # parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# # parser.add_argument(
+# #     "--use_pretrained_checkpoint",
+# #     action="store_true",
+# #     help="Use the pre-trained checkpoint from Nucleus.",
+# # )
+# # parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# # # append RSL-RL cli arguments
+# # cli_args.add_rsl_rl_args(parser)
+# # # append AppLauncher cli args
+# # AppLauncher.add_app_launcher_args(parser)
+# # # parse the arguments
+# # args_cli, hydra_args = parser.parse_known_args()
+# # # always enable cameras to record video
+# # if args_cli.video:
+# #     args_cli.enable_cameras = True
+
+# # # clear out sys.argv for Hydra
+# # sys.argv = [sys.argv[0]] + hydra_args
+
+# # # launch omniverse app
+# # app_launcher = AppLauncher(args_cli)
+# # simulation_app = app_launcher.app
+
+# # """Rest everything follows."""
+
+# # import gymnasium as gym
+# # import os
+# # import time
+# # import torch
+
+# # from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+# # from isaaclab.envs import (
+# #     DirectMARLEnv,
+# #     DirectMARLEnvCfg,
+# #     DirectRLEnvCfg,
+# #     ManagerBasedRLEnvCfg,
+# #     multi_agent_to_single_agent,
+# # )
+# # from isaaclab.utils.assets import retrieve_file_path
+# # from isaaclab.utils.dict import print_dict
+
+# # from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+# # from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+# # import isaaclab_tasks  # noqa: F401
+# # from isaaclab_tasks.utils import get_checkpoint_path
+# # from isaaclab_tasks.utils.hydra import hydra_task_config
+
+# # import mindbot.tasks  # noqa: F401
+
+
+# # @hydra_task_config(args_cli.task, args_cli.agent)
+# # def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+# #     """Play with RSL-RL agent."""
+# #     # grab task name for checkpoint path
+# #     task_name = args_cli.task.split(":")[-1]
+# #     train_task_name = task_name.replace("-Play", "")
+
+# #     # override configurations with non-hydra CLI arguments
+# #     agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+# #     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+# #     # set the environment seed
+# #     # note: certain randomizations occur in the environment initialization so we set the seed here
+# #     env_cfg.seed = agent_cfg.seed
+# #     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+# #     # specify directory for logging experiments
+# #     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+# #     log_root_path = os.path.abspath(log_root_path)
+# #     print(f"[INFO] Loading experiment from directory: {log_root_path}")
+# #     if args_cli.use_pretrained_checkpoint:
+# #         resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+# #         if not resume_path:
+# #             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+# #             return
+# #     elif args_cli.checkpoint:
+# #         resume_path = retrieve_file_path(args_cli.checkpoint)
+# #     else:
+# #         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+# #     log_dir = os.path.dirname(resume_path)
+
+# #     # set the log directory for the environment (works for all environment types)
+# #     env_cfg.log_dir = log_dir
+
+# #     # create isaac environment
+# #     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+# #     # convert to single-agent instance if required by the RL algorithm
+# #     if isinstance(env.unwrapped, DirectMARLEnv):
+# #         env = multi_agent_to_single_agent(env)
+
+# #     # wrap for video recording
+# #     if args_cli.video:
+# #         video_kwargs = {
+# #             "video_folder": os.path.join(log_dir, "videos", "play"),
+# #             "step_trigger": lambda step: step == 0,
+# #             "video_length": args_cli.video_length,
+# #             "disable_logger": True,
+# #         }
+# #         print("[INFO] Recording videos during training.")
+# #         print_dict(video_kwargs, nesting=4)
+# #         env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+# #     # wrap around environment for rsl-rl
+# #     env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+# #     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+# #     # load previously trained model
+# #     if agent_cfg.class_name == "OnPolicyRunner":
+# #         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+# #     elif agent_cfg.class_name == "DistillationRunner":
+# #         runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+# #     else:
+# #         raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+# #     runner.load(resume_path)
+
+# #     # obtain the trained policy for inference
+# #     policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+# #     # extract the neural network module
+# #     # we do this in a try-except to maintain backwards compatibility.
+# #     try:
+# #         # version 2.3 onwards
+# #         policy_nn = runner.alg.policy
+# #     except AttributeError:
+# #         # version 2.2 and below
+# #         policy_nn = runner.alg.actor_critic
+
+# #     # extract the normalizer
+# #     if hasattr(policy_nn, "actor_obs_normalizer"):
+# #         normalizer = policy_nn.actor_obs_normalizer
+# #     elif hasattr(policy_nn, "student_obs_normalizer"):
+# #         normalizer = policy_nn.student_obs_normalizer
+# #     else:
+# #         normalizer = None
+
+# #     # export policy to onnx/jit
+# #     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+# #     export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+# #     export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+# #     dt = env.unwrapped.step_dt
+
+# #     # reset environment
+# #     obs = env.get_observations()
+# #     timestep = 0
+# #     # simulate environment
+# #     while simulation_app.is_running():
+# #         start_time = time.time()
+# #         # run everything in inference mode
+# #         with torch.inference_mode():
+# #             # agent stepping
+# #             actions = policy(obs)
+# #             # env stepping
+# #             obs, _, dones, _ = env.step(actions)
+# #             # reset recurrent states for episodes that have terminated
+# #             policy_nn.reset(dones)
+# #         if args_cli.video:
+# #             timestep += 1
+# #             # Exit the play loop after recording one video
+# #             if timestep == args_cli.video_length:
+# #                 break
+
+# #         # time delay for real-time evaluation
+# #         sleep_time = dt - (time.time() - start_time)
+# #         if args_cli.real_time and sleep_time > 0:
+# #             time.sleep(sleep_time)
+
+# #     # close the simulator
+# #     env.close()
+
+
+# # if __name__ == "__main__":
+# #     # run the main function
+# #     main()
+# #     # close sim app
+# #     simulation_app.close()
--- a/scripts/rsl_rl/play.py.bak
+++ b/scripts/rsl_rl/play.py.bak
@@ -0,0 +1,210 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to play a checkpoint if an RL agent from RSL-RL."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+import sys
+
+from isaaclab.app import AppLauncher
+
+# local imports
+import cli_args  # isort: skip
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument(
+    "--use_pretrained_checkpoint",
+    action="store_true",
+    help="Use the pre-trained checkpoint from Nucleus.",
+)
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# append RSL-RL cli arguments
+cli_args.add_rsl_rl_args(parser)
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli, hydra_args = parser.parse_known_args()
+# always enable cameras to record video
+if args_cli.video:
+    args_cli.enable_cameras = True
+
+# clear out sys.argv for Hydra
+sys.argv = [sys.argv[0]] + hydra_args
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+import os
+import time
+import torch
+
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+from isaaclab.envs import (
+    DirectMARLEnv,
+    DirectMARLEnvCfg,
+    DirectRLEnvCfg,
+    ManagerBasedRLEnvCfg,
+    multi_agent_to_single_agent,
+)
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.dict import print_dict
+
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+
+import mindbot.tasks  # noqa: F401
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Play with RSL-RL agent."""
+    # grab task name for checkpoint path
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    # override configurations with non-hydra CLI arguments
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+
+    # set the environment seed
+    # note: certain randomizations occur in the environment initialization so we set the seed here
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    # specify directory for logging experiments
+    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+    log_root_path = os.path.abspath(log_root_path)
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+
+    # set the log directory for the environment (works for all environment types)
+    env_cfg.log_dir = log_dir
+
+    # create isaac environment
+    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+
+    # wrap for video recording
+    if args_cli.video:
+        video_kwargs = {
+            "video_folder": os.path.join(log_dir, "videos", "play"),
+            "step_trigger": lambda step: step == 0,
+            "video_length": args_cli.video_length,
+            "disable_logger": True,
+        }
+        print("[INFO] Recording videos during training.")
+        print_dict(video_kwargs, nesting=4)
+        env = gym.wrappers.RecordVideo(env, **video_kwargs)
+
+    # wrap around environment for rsl-rl
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    # load previously trained model
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    # obtain the trained policy for inference
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+    # extract the neural network module
+    # we do this in a try-except to maintain backwards compatibility.
+    try:
+        # version 2.3 onwards
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        # version 2.2 and below
+        policy_nn = runner.alg.actor_critic
+
+    # extract the normalizer
+    if hasattr(policy_nn, "actor_obs_normalizer"):
+        normalizer = policy_nn.actor_obs_normalizer
+    elif hasattr(policy_nn, "student_obs_normalizer"):
+        normalizer = policy_nn.student_obs_normalizer
+    else:
+        normalizer = None
+
+    # export policy to onnx/jit
+    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+    dt = env.unwrapped.step_dt
+
+    # reset environment
+    obs = env.get_observations()
+    timestep = 0
+    # simulate environment
+    while simulation_app.is_running():
+        start_time = time.time()
+        # run everything in inference mode
+        with torch.inference_mode():
+            # agent stepping
+            actions = policy(obs)
+            # env stepping
+            obs, _, dones, _ = env.step(actions)
+            # reset recurrent states for episodes that have terminated
+            policy_nn.reset(dones)
+        if args_cli.video:
+            timestep += 1
+            # Exit the play loop after recording one video
+            if timestep == args_cli.video_length:
+                break
+
+        # time delay for real-time evaluation
+        sleep_time = dt - (time.time() - start_time)
+        if args_cli.real_time and sleep_time > 0:
+            time.sleep(sleep_time)
+
+    # close the simulator
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
--- a/scripts/rsl_rl/play.py.bak1
+++ b/scripts/rsl_rl/play.py.bak1
@@ -0,0 +1,270 @@
+import argparse
+import sys
+import os
+import time
+
+import imageio
+import numpy as np
+import h5py
+import torch
+
+# 先引入 AppLauncher，并尽早实例化 SimulationApp，避免 pxr 未加载
+from isaaclab.app import AppLauncher
+import cli_args  # isort: skip
+
+# CLI
+parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
+parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
+parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
+parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+parser.add_argument("--max_steps", type=int, default=None, help="最大步数，达到后提前退出")
+cli_args.add_rsl_rl_args(parser)
+AppLauncher.add_app_launcher_args(parser)
+args_cli, hydra_args = parser.parse_known_args()
+if args_cli.video:
+    args_cli.enable_cameras = True
+# 将 hydra 剩余参数传递
+sys.argv = [sys.argv[0]] + hydra_args
+
+# ==== 先实例化 SimulationApp ====
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
+import gymnasium as gym
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.dict import print_dict
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+import mindbot.tasks  # noqa: F401
+
+CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] No pre-trained checkpoint for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+    env_cfg.log_dir = log_dir
+
+    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    try:
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        policy_nn = runner.alg.actor_critic
+
+    # 导出模型
+    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
+    normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+
+    # dt = env.unwrapped.step_dt
+
+    # # 录制缓冲
+    # cam_buffers = {n: [] for n in CAM_NAMES}
+    # joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    # t0 = time.time()
+
+    # obs = env.get_observations()
+    # timestep = 0
+    # step_count = 0
+    # try:
+    #     while simulation_app.is_running():
+    #         start_time = time.time()
+    #         with torch.inference_mode():
+    #             actions = policy(obs)
+    #             obs, _, dones, _ = env.step(actions)
+    #             policy_nn.reset(dones)
+
+    #         # 相机帧（取 env0，如需全部 env 去掉 [0]）
+    #         for name in CAM_NAMES:
+    #             if name not in env.unwrapped.scene.sensors:
+    #                 continue
+    #             cam = env.unwrapped.scene.sensors[name]
+    #             rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+    #             if rgba is None:
+    #                 continue
+    #             frame = rgba[0].cpu().numpy()
+    #             if frame.shape[-1] == 4:
+    #                 frame = frame[..., :3]
+    #             cam_buffers[name].append(frame)
+
+    #         # 关节 / 速度 / 动作
+    #         robot = env.unwrapped.scene["Mindbot"]
+    #         joint_log.append(robot.data.joint_pos.cpu().numpy())
+    #         joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+    #         action_log.append(actions.cpu().numpy())
+    #         ts_log.append(time.time() - t0)
+
+    #         step_count += 1
+    #         if args_cli.max_steps and step_count >= args_cli.max_steps:
+    #             break
+
+    #         if args_cli.video:
+    #             timestep += 1
+    #             if timestep == args_cli.video_length:
+    #                 break
+
+    #         sleep_time = dt - (time.time() - start_time)
+    #         if args_cli.real_time and sleep_time > 0:
+    #             time.sleep(sleep_time)
+    # finally:
+    #     # 保存 HDF5
+    #     h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    #     with h5py.File(h5_path, "w") as f:
+    #         f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+    #         f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+    #         f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+    #         f.create_dataset("timestamps", data=np.array(ts_log))
+    #         for name, frames in cam_buffers.items():
+    #             if not frames:
+    #                 continue
+    #             dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+    #             if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+    #                 fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+    #             else:
+    #                 fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+    #             dset.attrs["fx"] = fx
+    #             dset.attrs["fy"] = fy
+    #             dset.attrs["cx"] = cx
+    #             dset.attrs["cy"] = cy
+    #             dset.attrs["focal_length"] = 1.93
+    #     print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    #     # 可选：单路 MP4
+    #     head_frames = cam_buffers["cam_head"]
+    #     if head_frames:
+    #         fps = int(round(1.0 / dt))
+    #         video_path = os.path.join(log_dir, "cam_head.mp4")
+    #         imageio.mimsave(video_path, head_frames, fps=fps)
+    #         print(f"[INFO] Saved video to {video_path}")
+
+    #     env.close()
+    dt = env.unwrapped.step_dt
+
+    # 录制缓冲
+    cam_buffers = {n: [] for n in CAM_NAMES}
+    joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
+    t0 = time.time()
+
+    obs = env.get_observations()
+    timestep = 0
+    while simulation_app.is_running():
+        start_time = time.time()
+        with torch.inference_mode():
+            actions = policy(obs)
+            obs, _, dones, _ = env.step(actions)
+            policy_nn.reset(dones)
+
+        # 相机帧（取 env0，如需全部 env 去掉 [0]）
+        for name in CAM_NAMES:
+            if name not in env.unwrapped.scene.sensors:
+                continue
+            cam = env.unwrapped.scene.sensors[name]
+            rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
+            if rgba is None:
+                continue
+            frame = rgba[0].cpu().numpy()
+            if frame.shape[-1] == 4:
+                frame = frame[..., :3]
+            cam_buffers[name].append(frame)
+
+        # 关节 / 速度 / 动作
+        robot = env.unwrapped.scene["Mindbot"]
+        joint_log.append(robot.data.joint_pos.cpu().numpy())
+        joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
+        action_log.append(actions.cpu().numpy())
+        ts_log.append(time.time() - t0)
+
+        if args_cli.video:
+            timestep += 1
+            if timestep == args_cli.video_length:
+                break
+
+        sleep_time = dt - (time.time() - start_time)
+        if args_cli.real_time and sleep_time > 0:
+            time.sleep(sleep_time)
+
+    # 保存 HDF5
+    h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
+    with h5py.File(h5_path, "w") as f:
+        f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
+        f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
+        f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
+        f.create_dataset("timestamps", data=np.array(ts_log))
+        for name, frames in cam_buffers.items():
+            if not frames:
+                continue
+            dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
+            # 内参：按你相机 vector 设置
+            if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
+                fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
+            else:
+                fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
+            dset.attrs["fx"] = fx
+            dset.attrs["fy"] = fy
+            dset.attrs["cx"] = cx
+            dset.attrs["cy"] = cy
+            dset.attrs["focal_length"] = 1.93
+    print(f"[INFO] Saved HDF5 to {h5_path}")
+
+    # 可选：单路 MP4
+    head_frames = cam_buffers["cam_head"]
+    if head_frames:
+        fps = int(round(1.0 / dt))
+        video_path = os.path.join(log_dir, "cam_head.mp4")
+        imageio.mimsave(video_path, head_frames, fps=fps)
+        print(f"[INFO] Saved video to {video_path}")
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()