From 196378f2d3a4906d1b29dee554c19ba38c4dacbd Mon Sep 17 00:00:00 2001 From: hangX Date: Wed, 28 Jan 2026 19:51:03 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9C=AC=E5=9C=B04090=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 + scripts/rsl_rl/play.py | 836 ++++++++++++++- scripts/rsl_rl/play.py.bak | 210 ++++ scripts/rsl_rl/play.py.bak1 | 270 +++++ source/mindbot/mindbot/robot/mindbot.py | 52 +- .../manager_based/centrifuge/__init__.py | 30 + .../centrifuge/agents/__init__.py | 4 + .../centrifuge/agents/rl_games_ppo_cfg.yaml | 78 ++ .../centrifuge/agents/rsl_rl_ppo_cfg.py | 72 ++ .../centrifuge/agents/sb3_ppo_cfg.yaml | 20 + .../centrifuge/agents/skrl_amp_cfg.yaml | 111 ++ .../centrifuge/agents/skrl_ippo_cfg.yaml | 80 ++ .../centrifuge/agents/skrl_mappo_cfg.yaml | 82 ++ .../centrifuge/agents/skrl_ppo_cfg.yaml | 80 ++ .../centrifuge/centrifuge_env_cfg.py | 636 ++++++++++++ .../manager_based/centrifuge/mdp/__init__.py | 13 + .../centrifuge/mdp/curriculums.py | 50 + .../manager_based/centrifuge/mdp/gripper.py | 54 + .../manager_based/centrifuge/mdp/rewards.py | 881 ++++++++++++++++ .../centrifuge/mdp/terminations.py | 97 ++ .../tasks/manager_based/pull/mdp/rewards.py | 657 ++++++------ .../manager_based/pull/mdp/rewards.py.bak1 | 967 ++++++++++++++++++ .../tasks/manager_based/pull/pull_env_cfg.py | 347 +++++-- .../manager_based/pull/pull_env_cfg.py.bak | 681 ++++++++++++ .../pullUltrasoundLidUp/__init__.py | 29 + .../pullUltrasoundLidUp/agents/__init__.py | 4 + .../agents/rl_games_ppo_cfg.yaml | 78 ++ .../agents/rsl_rl_ppo_cfg.py | 72 ++ .../agents/sb3_ppo_cfg.yaml | 20 + .../agents/skrl_amp_cfg.yaml | 111 ++ .../agents/skrl_ippo_cfg.yaml | 80 ++ .../agents/skrl_mappo_cfg.yaml | 82 ++ .../agents/skrl_ppo_cfg.yaml | 80 ++ .../pullUltrasoundLidUp/mdp/__init__.py | 13 + .../pullUltrasoundLidUp/mdp/curriculums.py | 50 + .../pullUltrasoundLidUp/mdp/gripper.py | 54 + .../pullUltrasoundLidUp/mdp/rewards.py | 881 ++++++++++++++++ .../pullUltrasoundLidUp/mdp/terminations.py | 97 ++ .../pullUltrasoundLidUp/mindbot_env_cfg.py | 578 +++++++++++ test.py | 282 +++++ test_sence.py | 73 ++ 41 files changed, 8447 insertions(+), 450 deletions(-) create mode 100644 scripts/rsl_rl/play.py.bak create mode 100644 scripts/rsl_rl/play.py.bak1 create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1 create mode 100644 source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py create mode 100644 source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py create mode 100644 test.py create mode 100644 test_sence.py diff --git a/.gitignore b/.gitignore index 08d2e8d..29b8e0c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +logs/ +outputs/ +.venv/ + + # C++ **/cmake-build*/ **/build*/ diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 50f28e9..baef6c2 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -9,6 +9,12 @@ import argparse import sys +import os +import torch +import torchvision +import gymnasium as gym +import time +from datetime import datetime from isaaclab.app import AppLauncher @@ -18,7 +24,7 @@ import cli_args # isort: skip # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") -parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." ) @@ -53,11 +59,6 @@ simulation_app = app_launcher.app """Rest everything follows.""" -import gymnasium as gym -import os -import time -import torch - from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( @@ -79,6 +80,215 @@ from isaaclab_tasks.utils.hydra import hydra_task_config import mindbot.tasks # noqa: F401 +# ============================================================================== +# [Update] 多相机、多环境录制类 +# ============================================================================== +# 1. 确保文件顶部有以下导入 +import imageio +import numpy as np + +"""use gpu compute to record video""" + +class MultiCameraRecorder: + def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30): + self.env = env + self.camera_names = camera_names + self.env_indices = env_indices + self.output_dir = output_dir + self.fps = fps + self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names} + os.makedirs(self.output_dir, exist_ok=True) + + self.cameras = {} + for name in camera_names: + if name in self.env.unwrapped.scene.keys(): + self.cameras[name] = self.env.unwrapped.scene[name] + print(f"[INFO] Camera {name} linked.") + + def record_step(self): + """保持在 GPU 上克隆数据""" + for cam_name, camera_obj in self.cameras.items(): + # 获取数据前强制同步一次(防止后端丢失) + rgb_data = camera_obj.data.output["rgb"] + + for env_idx in self.env_indices: + # 使用 .clone() 保持在 GPU,但要注意显存占用 + self.frames[cam_name][env_idx].append(rgb_data[env_idx].clone()) + + def save_videos(self, filename_suffix=""): + print(f"[INFO] Saving videos from GPU to Disk...") + for cam_name, env_dict in self.frames.items(): + for env_idx, frame_list in env_dict.items(): + if not frame_list: continue + + # 转换为 torchvision 格式 (T, C, H, W) + video_tensor = torch.stack(frame_list) + if video_tensor.shape[-1] == 4: # RGBA -> RGB + video_tensor = video_tensor[..., :3] + + # 移动到 CPU 并保存 + video_cpu = video_tensor.cpu() + output_path = os.path.join(self.output_dir, f"{cam_name}_env{env_idx}_{filename_suffix}.mp4") + + # 使用 torchvision 写入 (T, H, W, C) + torchvision.io.write_video(output_path, video_cpu, fps=self.fps) + + # 【关键】保存后立即释放显存 + del video_tensor + del video_cpu + frame_list.clear() + + torch.cuda.empty_cache() + +"""use cpu compute to record video""" +# # 2. 修改 MultiCameraRecorder 类 +# class MultiCameraRecorder: +# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30): +# self.env = env +# self.camera_names = camera_names +# self.env_indices = env_indices +# self.output_dir = output_dir +# self.fps = fps +# self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names} +# os.makedirs(self.output_dir, exist_ok=True) +# self.cameras = {} +# for name in camera_names: +# try: +# self.cameras[name] = self.env.unwrapped.scene[name] +# print(f"[INFO][MultiCameraRecorder] Found camera: {name}") +# except KeyError: +# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found!") + +# def record_step(self): +# """在每个仿真步调用""" +# for cam_name, camera_obj in self.cameras.items(): +# # [关键修改] 获取数据前先确保数据已同步 +# # 这可以防止读取到正在渲染中的内存导致 access violation +# rgb_data = camera_obj.data.output["rgb"] + +# for env_idx in self.env_indices: +# if env_idx >= rgb_data.shape[0]: continue + +# # 转换为 CPU 上的 numpy,这种方式通常比 torchvision 的 tensor 堆叠更稳 +# frame = rgb_data[env_idx].clone().detach().cpu().numpy() +# self.frames[cam_name][env_idx].append(frame) + +# def save_videos(self, filename_suffix=""): +# """循环结束后调用""" +# print(f"[INFO][MultiCameraRecorder] Saving videos...") +# for cam_name, env_dict in self.frames.items(): +# for env_idx, frame_list in env_dict.items(): +# if not frame_list: continue + +# print(f" -> Saving {cam_name} (Env {env_idx})...") + +# # 处理格式并使用 imageio 保存 +# processed_frames = [] +# for img in frame_list: +# # [0, 1] -> [0, 255] +# if img.dtype != np.uint8: +# if img.max() <= 1.01: img = (img * 255).astype(np.uint8) +# else: img = img.astype(np.uint8) +# # 去掉 Alpha 通道 +# if img.shape[-1] == 4: img = img[:, :, :3] +# processed_frames.append(img) + +# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4" +# output_path = os.path.join(self.output_dir, fname) + +# try: +# # 使用 imageio 写入视频 +# imageio.mimsave(output_path, processed_frames, fps=self.fps) +# print(f" Saved: {output_path}") +# except Exception as e: +# print(f" [ERROR] Failed to save {fname}: {e}") + + + +# class MultiCameraRecorder: +# """ +# 用于从 Isaac Lab 环境中录制多个相机的图像数据,并支持多个环境实例。 +# """ +# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30): +# self.env = env +# self.camera_names = camera_names +# self.env_indices = env_indices # 要录制的环境索引列表,例如 [0, 1] +# self.output_dir = output_dir +# self.fps = fps + +# # 数据结构: self.frames[camera_name][env_idx] = [list of tensors] +# self.frames = { +# cam_name: {env_idx: [] for env_idx in env_indices} +# for cam_name in camera_names +# } + +# os.makedirs(self.output_dir, exist_ok=True) + +# # 获取场景中的相机对象 +# self.cameras = {} +# for name in camera_names: +# try: +# # 尝试获取相机句柄 +# self.cameras[name] = self.env.unwrapped.scene[name] +# print(f"[INFO][MultiCameraRecorder] Found camera: {name}") +# except KeyError: +# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found in scene! Skipping.") + +# def record_step(self): +# """在每个仿真步调用""" +# for cam_name, camera_obj in self.cameras.items(): +# # 获取 RGB 数据 (Shape: [num_envs, H, W, C]) +# # 注意:如果数据类型是 RGBA,后面处理 +# rgb_data = camera_obj.data.output["rgb"] + +# for env_idx in self.env_indices: +# # 边界检查 +# if env_idx >= rgb_data.shape[0]: +# continue + +# # [关键] 立即 clone 并转到 CPU。 +# # 如果不转 CPU,4个相机 * 长时间录制会迅速耗尽 GPU 显存。 +# frame = rgb_data[env_idx].clone().detach().cpu() + +# self.frames[cam_name][env_idx].append(frame) + +# def save_videos(self, filename_suffix=""): +# """循环结束后调用,保存所有视频""" +# print(f"[INFO][MultiCameraRecorder] Saving videos for {len(self.cameras)} cameras...") + +# for cam_name, env_dict in self.frames.items(): +# for env_idx, frame_list in env_dict.items(): +# if not frame_list: +# continue + +# print(f" -> Processing {cam_name} (Env {env_idx}) with {len(frame_list)} frames...") + +# # 堆叠 (T, H, W, C) +# video_tensor = torch.stack(frame_list) + +# # 去除 Alpha 通道 +# if video_tensor.shape[-1] == 4: +# video_tensor = video_tensor[..., :3] + +# # (T, H, W, C) -> (T, C, H, W) +# video_tensor = video_tensor.permute(0, 3, 1, 2) + +# # 确保是 uint8 +# if video_tensor.dtype != torch.uint8: +# if video_tensor.max() <= 1.0: +# video_tensor = (video_tensor * 255).to(torch.uint8) +# else: +# video_tensor = video_tensor.to(torch.uint8) + +# # 文件名: left_hand_camera_env0_2024-xx-xx.mp4 +# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4" +# output_path = os.path.join(self.output_dir, fname) + +# torchvision.io.write_video(output_path, video_tensor, fps=self.fps) +# print(f" Saved: {output_path}") + +# ============================================================================== + @hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): @@ -92,7 +302,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs # set the environment seed - # note: certain randomizations occur in the environment initialization so we set the seed here env_cfg.seed = agent_cfg.seed env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device @@ -122,7 +331,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen if isinstance(env.unwrapped, DirectMARLEnv): env = multi_agent_to_single_agent(env) - # wrap for video recording + # wrap for video recording (standard Gym recording - Viewport) if args_cli.video: video_kwargs = { "video_folder": os.path.join(log_dir, "videos", "play"), @@ -151,12 +360,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen policy = runner.get_inference_policy(device=env.unwrapped.device) # extract the neural network module - # we do this in a try-except to maintain backwards compatibility. try: - # version 2.3 onwards policy_nn = runner.alg.policy except AttributeError: - # version 2.2 and below policy_nn = runner.alg.actor_critic # extract the normalizer @@ -174,9 +380,42 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen dt = env.unwrapped.step_dt + # ========================================================================== + # 初始化多相机录制器 + # ========================================================================== + recorder = None + if args_cli.video: + # 1. 定义要录制的相机列表(需与 mindbot_env_cfg.py 中的命名一致) + target_cameras = [ + "left_hand_camera", + "right_hand_camera", + "head_camera", + "chest_camera" + ] + + # 2. 定义要录制的环境索引 (env_nums) + # 默认只录制第 0 个环境,如果要录制前 2 个,改为 [0, 1] + # 注意:不要录制太多环境,否则写入视频时非常慢且占内存 + envs_to_record = [0,1,2,3] + # 如果你想录制所有环境(不推荐,除非num_envs很小): + # envs_to_record = list(range(env.unwrapped.num_envs)) + + save_dir = os.path.join(log_dir, "robot_camera_recordings") + + recorder = MultiCameraRecorder( + env=env, + camera_names=target_cameras, + env_indices=envs_to_record, + output_dir=save_dir, + fps=int(1/dt) + ) + # reset environment obs = env.get_observations() timestep = 0 + + print("[INFO] Starting simulation loop...") + # simulate environment while simulation_app.is_running(): start_time = time.time() @@ -186,12 +425,26 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen actions = policy(obs) # env stepping obs, _, dones, _ = env.step(actions) + + # ================================================================== + # 每一帧录制数据 + # ================================================================== + if recorder: + recorder.record_step() + # reset recurrent states for episodes that have terminated policy_nn.reset(dones) + if args_cli.video: timestep += 1 # Exit the play loop after recording one video if timestep == args_cli.video_length: + # ============================================================== + # 循环结束时保存所有视频 + # ============================================================== + if recorder: + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + recorder.save_videos(filename_suffix=timestamp) break # time delay for real-time evaluation @@ -208,3 +461,564 @@ if __name__ == "__main__": main() # close sim app simulation_app.close() + +# # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# # All rights reserved. +# # +# # SPDX-License-Identifier: BSD-3-Clause + +# """Script to play a checkpoint if an RL agent from RSL-RL.""" + +# """Launch Isaac Sim Simulator first.""" + +# import argparse +# import sys +# import os +# import torch +# import torchvision +# import imageio +# import numpy as np +# import gymnasium as gym +# import time +# from datetime import datetime + +# from isaaclab.app import AppLauncher + +# # local imports +# import cli_args # isort: skip + +# # add argparse arguments +# parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") +# parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +# parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).") +# parser.add_argument( +# "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +# ) +# parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +# parser.add_argument("--task", type=str, default=None, help="Name of the task.") +# parser.add_argument( +# "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +# ) +# parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +# parser.add_argument( +# "--use_pretrained_checkpoint", +# action="store_true", +# help="Use the pre-trained checkpoint from Nucleus.", +# ) +# parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +# # append RSL-RL cli arguments +# cli_args.add_rsl_rl_args(parser) +# # append AppLauncher cli args +# AppLauncher.add_app_launcher_args(parser) +# # parse the arguments +# args_cli, hydra_args = parser.parse_known_args() +# # always enable cameras to record video +# if args_cli.video: +# args_cli.enable_cameras = True + +# # clear out sys.argv for Hydra +# sys.argv = [sys.argv[0]] + hydra_args + +# # launch omniverse app +# app_launcher = AppLauncher(args_cli) +# simulation_app = app_launcher.app + +# """Rest everything follows.""" + +# from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +# from isaaclab.envs import ( +# DirectMARLEnv, +# DirectMARLEnvCfg, +# DirectRLEnvCfg, +# ManagerBasedRLEnvCfg, +# multi_agent_to_single_agent, +# ) +# from isaaclab.utils.assets import retrieve_file_path +# from isaaclab.utils.dict import print_dict + +# from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +# from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +# import isaaclab_tasks # noqa: F401 +# from isaaclab_tasks.utils import get_checkpoint_path +# from isaaclab_tasks.utils.hydra import hydra_task_config + +# import mindbot.tasks # noqa: F401 + +# # ============================================================================== +# # 新增:自定义相机录制类 +# # ============================================================================== +# class CameraRecorder: +# """ +# 用于从 Isaac Lab 环境中录制指定相机的图像数据并保存为视频。 +# """ +# def __init__(self, env, camera_name: str, output_dir: str, fps: int = 30): +# self.env = env +# self.camera_name = camera_name +# self.output_dir = output_dir +# self.fps = fps +# self.frames = [] # 用于存储每一帧的 Tensor + +# # 确保输出目录存在 +# os.makedirs(self.output_dir, exist_ok=True) + +# # 尝试从场景中获取相机对象 +# # env.unwrapped 访问原始的 ManagerBasedRLEnv +# try: +# self.camera = self.env.unwrapped.scene[camera_name] +# print(f"[INFO][CameraRecorder] Successfully found camera: {camera_name}") +# except KeyError: +# print(f"[ERROR][CameraRecorder] Camera '{camera_name}' not found in scene entities!") +# print(f"Available entities: {self.env.unwrapped.scene.keys()}") +# raise + +# def record_step(self): +# """在每个仿真步调用,获取图像数据""" +# # 获取 RGB 数据。注意:Isaac Lab 的输出通常是 (num_envs, H, W, C) +# # 我们这里只取第一个环境 (env_index=0) +# # output["rgb"] 可能是 RGBA 或 RGB +# rgb_tensor = self.camera.data.output["rgb"][0] + +# # 如果数据在 GPU 上,克隆一份并 detach,防止被后续步骤覆盖 +# # 此时先不转 CPU,以免拖慢仿真循环,最后保存时再转 +# self.frames.append(rgb_tensor.clone().detach()) + +# def save_video(self, filename="robot_camera_video.mp4"): +# """循环结束后调用,将内存中的帧保存为视频""" +# if not self.frames: +# print("[WARN][CameraRecorder] No frames recorded.") +# return + +# print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...") + +# # 将 tensor 转换为 numpy 格式 +# video_frames = [] +# for frame in self.frames: +# # 转换为 CPU 上的 numpy 数组 (H, W, C) +# img = frame.cpu().numpy() +# # 处理归一化数据 [0, 1] -> [0, 255] +# if img.dtype != np.uint8: +# if img.max() <= 1.01: +# img = (img * 255).astype(np.uint8) +# else: +# img = img.astype(np.uint8) +# # 处理 RGBA -> RGB +# if img.shape[-1] == 4: +# img = img[:, :, :3] +# video_frames.append(img) + +# output_path = os.path.join(self.output_dir, filename) + +# # 使用 imageio 写入视频,这种方式在 Windows 上更稳定 +# try: +# imageio.mimsave(output_path, video_frames, fps=self.fps) +# print(f"[INFO][CameraRecorder] Video saved to: {output_path}") +# except Exception as e: +# print(f"[ERROR][CameraRecorder] Failed to save video using imageio: {e}") +# # 如果 imageio 也失败,尝试最后的备选方案 +# print("[INFO] Attempting to save frames as images instead...") +# # ... (可选:保存为一系列图片) +# # def save_video(self, filename="robot_camera_video.mp4"): +# # """循环结束后调用,将内存中的帧保存为视频""" +# # if not self.frames: +# # print("[WARN][CameraRecorder] No frames recorded.") +# # return + +# # print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...") + +# # # 堆叠帧 -> (T, H, W, C) +# # video_tensor = torch.stack(self.frames) + +# # # 处理 Alpha 通道:如果是 RGBA (4通道),只取前3个 RGB +# # if video_tensor.shape[-1] == 4: +# # video_tensor = video_tensor[..., :3] + +# # # 转换为 (T, C, H, W) 以符合 torchvision 的输入要求 +# # # 原始: (T, H, W, C) -> Permute -> (T, C, H, W) +# # video_tensor = video_tensor.permute(0, 3, 1, 2) + +# # # 确保是 uint8 类型 [0, 255] +# # if video_tensor.dtype != torch.uint8: +# # # 如果是 float [0,1],则 * 255 并转 uint8 +# # if video_tensor.max() <= 1.0: +# # video_tensor = (video_tensor * 255).to(torch.uint8) +# # else: +# # video_tensor = video_tensor.to(torch.uint8) + +# # # 移动到 CPU +# # video_tensor = video_tensor.cpu() + +# # output_path = os.path.join(self.output_dir, filename) + +# # # 使用 torchvision 保存视频 +# # torchvision.io.write_video(output_path, video_tensor, fps=self.fps) +# # print(f"[INFO][CameraRecorder] Video saved to: {output_path}") + +# # ============================================================================== + + +# @hydra_task_config(args_cli.task, args_cli.agent) +# def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): +# """Play with RSL-RL agent.""" +# # grab task name for checkpoint path +# task_name = args_cli.task.split(":")[-1] +# train_task_name = task_name.replace("-Play", "") + +# # override configurations with non-hydra CLI arguments +# agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) +# env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + +# # set the environment seed +# env_cfg.seed = agent_cfg.seed +# env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + +# # specify directory for logging experiments +# log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) +# log_root_path = os.path.abspath(log_root_path) +# print(f"[INFO] Loading experiment from directory: {log_root_path}") +# if args_cli.use_pretrained_checkpoint: +# resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) +# if not resume_path: +# print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") +# return +# elif args_cli.checkpoint: +# resume_path = retrieve_file_path(args_cli.checkpoint) +# else: +# resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + +# log_dir = os.path.dirname(resume_path) + +# # set the log directory for the environment (works for all environment types) +# env_cfg.log_dir = log_dir + +# # create isaac environment +# env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + +# # convert to single-agent instance if required by the RL algorithm +# if isinstance(env.unwrapped, DirectMARLEnv): +# env = multi_agent_to_single_agent(env) + +# # wrap for video recording (standard Gym recording) +# # 注意:这只是录制 Viewport 的画面,不是你机器人相机的画面 +# if args_cli.video: +# video_kwargs = { +# "video_folder": os.path.join(log_dir, "videos", "play"), +# "step_trigger": lambda step: step == 0, +# "video_length": args_cli.video_length, +# "disable_logger": True, +# } +# print("[INFO] Recording videos during training.") +# print_dict(video_kwargs, nesting=4) +# env = gym.wrappers.RecordVideo(env, **video_kwargs) + +# # wrap around environment for rsl-rl +# env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + +# print(f"[INFO]: Loading model checkpoint from: {resume_path}") +# # load previously trained model +# if agent_cfg.class_name == "OnPolicyRunner": +# runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) +# elif agent_cfg.class_name == "DistillationRunner": +# runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) +# else: +# raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") +# runner.load(resume_path) + +# # obtain the trained policy for inference +# policy = runner.get_inference_policy(device=env.unwrapped.device) + +# # extract the neural network module +# try: +# policy_nn = runner.alg.policy +# except AttributeError: +# policy_nn = runner.alg.actor_critic + +# # extract the normalizer +# if hasattr(policy_nn, "actor_obs_normalizer"): +# normalizer = policy_nn.actor_obs_normalizer +# elif hasattr(policy_nn, "student_obs_normalizer"): +# normalizer = policy_nn.student_obs_normalizer +# else: +# normalizer = None + +# # export policy to onnx/jit +# export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") +# export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") +# export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + +# dt = env.unwrapped.step_dt + +# # ========================================================================== +# # 初始化自定义相机录制器 +# # ========================================================================== +# recorder = None +# if args_cli.video: +# # 这里的 output_dir 设为日志目录下的 hand_camera_videos +# save_dir = os.path.join(log_dir, "hand_camera_videos") +# # 这里的 "left_hand_camera" 必须对应 mindbot_env_cfg.py 中定义的名称 +# recorder = CameraRecorder(env, "left_hand_camera", save_dir, fps=int(1/dt)) + +# # reset environment +# obs = env.get_observations() +# timestep = 0 + +# print("[INFO] Starting simulation loop...") + +# # simulate environment +# while simulation_app.is_running(): +# start_time = time.time() +# # run everything in inference mode +# with torch.inference_mode(): +# # agent stepping +# actions = policy(obs) +# # env stepping +# obs, _, dones, _ = env.step(actions) + +# # ================================================================== +# # 每一帧录制数据 +# # ================================================================== +# if recorder: +# recorder.record_step() + +# # reset recurrent states for episodes that have terminated +# policy_nn.reset(dones) + +# if args_cli.video: +# timestep += 1 +# # Exit the play loop after recording one video +# if timestep == args_cli.video_length: +# # ============================================================== +# # 循环结束时保存视频 +# # ============================================================== +# if recorder: +# timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +# recorder.save_video(filename=f"hand_cam_{timestamp}.mp4") +# break + +# # time delay for real-time evaluation +# sleep_time = dt - (time.time() - start_time) +# if args_cli.real_time and sleep_time > 0: +# time.sleep(sleep_time) + +# # close the simulator +# env.close() + + +# if __name__ == "__main__": +# # run the main function +# main() +# # close sim app +# simulation_app.close() + + +# # # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# # # All rights reserved. +# # # +# # # SPDX-License-Identifier: BSD-3-Clause + +# # """Script to play a checkpoint if an RL agent from RSL-RL.""" + +# # """Launch Isaac Sim Simulator first.""" + +# # import argparse +# # import sys + +# # from isaaclab.app import AppLauncher + +# # # local imports +# # import cli_args # isort: skip + +# # # add argparse arguments +# # parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") +# # parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +# # parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +# # parser.add_argument( +# # "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +# # ) +# # parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +# # parser.add_argument("--task", type=str, default=None, help="Name of the task.") +# # parser.add_argument( +# # "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +# # ) +# # parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +# # parser.add_argument( +# # "--use_pretrained_checkpoint", +# # action="store_true", +# # help="Use the pre-trained checkpoint from Nucleus.", +# # ) +# # parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +# # # append RSL-RL cli arguments +# # cli_args.add_rsl_rl_args(parser) +# # # append AppLauncher cli args +# # AppLauncher.add_app_launcher_args(parser) +# # # parse the arguments +# # args_cli, hydra_args = parser.parse_known_args() +# # # always enable cameras to record video +# # if args_cli.video: +# # args_cli.enable_cameras = True + +# # # clear out sys.argv for Hydra +# # sys.argv = [sys.argv[0]] + hydra_args + +# # # launch omniverse app +# # app_launcher = AppLauncher(args_cli) +# # simulation_app = app_launcher.app + +# # """Rest everything follows.""" + +# # import gymnasium as gym +# # import os +# # import time +# # import torch + +# # from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +# # from isaaclab.envs import ( +# # DirectMARLEnv, +# # DirectMARLEnvCfg, +# # DirectRLEnvCfg, +# # ManagerBasedRLEnvCfg, +# # multi_agent_to_single_agent, +# # ) +# # from isaaclab.utils.assets import retrieve_file_path +# # from isaaclab.utils.dict import print_dict + +# # from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +# # from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +# # import isaaclab_tasks # noqa: F401 +# # from isaaclab_tasks.utils import get_checkpoint_path +# # from isaaclab_tasks.utils.hydra import hydra_task_config + +# # import mindbot.tasks # noqa: F401 + + +# # @hydra_task_config(args_cli.task, args_cli.agent) +# # def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): +# # """Play with RSL-RL agent.""" +# # # grab task name for checkpoint path +# # task_name = args_cli.task.split(":")[-1] +# # train_task_name = task_name.replace("-Play", "") + +# # # override configurations with non-hydra CLI arguments +# # agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) +# # env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + +# # # set the environment seed +# # # note: certain randomizations occur in the environment initialization so we set the seed here +# # env_cfg.seed = agent_cfg.seed +# # env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + +# # # specify directory for logging experiments +# # log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) +# # log_root_path = os.path.abspath(log_root_path) +# # print(f"[INFO] Loading experiment from directory: {log_root_path}") +# # if args_cli.use_pretrained_checkpoint: +# # resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) +# # if not resume_path: +# # print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") +# # return +# # elif args_cli.checkpoint: +# # resume_path = retrieve_file_path(args_cli.checkpoint) +# # else: +# # resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + +# # log_dir = os.path.dirname(resume_path) + +# # # set the log directory for the environment (works for all environment types) +# # env_cfg.log_dir = log_dir + +# # # create isaac environment +# # env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + +# # # convert to single-agent instance if required by the RL algorithm +# # if isinstance(env.unwrapped, DirectMARLEnv): +# # env = multi_agent_to_single_agent(env) + +# # # wrap for video recording +# # if args_cli.video: +# # video_kwargs = { +# # "video_folder": os.path.join(log_dir, "videos", "play"), +# # "step_trigger": lambda step: step == 0, +# # "video_length": args_cli.video_length, +# # "disable_logger": True, +# # } +# # print("[INFO] Recording videos during training.") +# # print_dict(video_kwargs, nesting=4) +# # env = gym.wrappers.RecordVideo(env, **video_kwargs) + +# # # wrap around environment for rsl-rl +# # env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + +# # print(f"[INFO]: Loading model checkpoint from: {resume_path}") +# # # load previously trained model +# # if agent_cfg.class_name == "OnPolicyRunner": +# # runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) +# # elif agent_cfg.class_name == "DistillationRunner": +# # runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) +# # else: +# # raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") +# # runner.load(resume_path) + +# # # obtain the trained policy for inference +# # policy = runner.get_inference_policy(device=env.unwrapped.device) + +# # # extract the neural network module +# # # we do this in a try-except to maintain backwards compatibility. +# # try: +# # # version 2.3 onwards +# # policy_nn = runner.alg.policy +# # except AttributeError: +# # # version 2.2 and below +# # policy_nn = runner.alg.actor_critic + +# # # extract the normalizer +# # if hasattr(policy_nn, "actor_obs_normalizer"): +# # normalizer = policy_nn.actor_obs_normalizer +# # elif hasattr(policy_nn, "student_obs_normalizer"): +# # normalizer = policy_nn.student_obs_normalizer +# # else: +# # normalizer = None + +# # # export policy to onnx/jit +# # export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") +# # export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") +# # export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + +# # dt = env.unwrapped.step_dt + +# # # reset environment +# # obs = env.get_observations() +# # timestep = 0 +# # # simulate environment +# # while simulation_app.is_running(): +# # start_time = time.time() +# # # run everything in inference mode +# # with torch.inference_mode(): +# # # agent stepping +# # actions = policy(obs) +# # # env stepping +# # obs, _, dones, _ = env.step(actions) +# # # reset recurrent states for episodes that have terminated +# # policy_nn.reset(dones) +# # if args_cli.video: +# # timestep += 1 +# # # Exit the play loop after recording one video +# # if timestep == args_cli.video_length: +# # break + +# # # time delay for real-time evaluation +# # sleep_time = dt - (time.time() - start_time) +# # if args_cli.real_time and sleep_time > 0: +# # time.sleep(sleep_time) + +# # # close the simulator +# # env.close() + + +# # if __name__ == "__main__": +# # # run the main function +# # main() +# # # close sim app +# # simulation_app.close() diff --git a/scripts/rsl_rl/play.py.bak b/scripts/rsl_rl/play.py.bak new file mode 100644 index 0000000..50f28e9 --- /dev/null +++ b/scripts/rsl_rl/play.py.bak @@ -0,0 +1,210 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to play a checkpoint if an RL agent from RSL-RL.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# local imports +import cli_args # isort: skip + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", +) +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +# append RSL-RL cli arguments +cli_args.add_rsl_rl_args(parser) +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import time +import torch + +from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict + +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + """Play with RSL-RL agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + # override configurations with non-hydra CLI arguments + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # specify directory for logging experiments + log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) + if not resume_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint: + resume_path = retrieve_file_path(args_cli.checkpoint) + else: + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + log_dir = os.path.dirname(resume_path) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for rsl-rl + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + # load previously trained model + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) + + # obtain the trained policy for inference + policy = runner.get_inference_policy(device=env.unwrapped.device) + + # extract the neural network module + # we do this in a try-except to maintain backwards compatibility. + try: + # version 2.3 onwards + policy_nn = runner.alg.policy + except AttributeError: + # version 2.2 and below + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None + + # export policy to onnx/jit + export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + + dt = env.unwrapped.step_dt + + # reset environment + obs = env.get_observations() + timestep = 0 + # simulate environment + while simulation_app.is_running(): + start_time = time.time() + # run everything in inference mode + with torch.inference_mode(): + # agent stepping + actions = policy(obs) + # env stepping + obs, _, dones, _ = env.step(actions) + # reset recurrent states for episodes that have terminated + policy_nn.reset(dones) + if args_cli.video: + timestep += 1 + # Exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/rsl_rl/play.py.bak1 b/scripts/rsl_rl/play.py.bak1 new file mode 100644 index 0000000..2a7025e --- /dev/null +++ b/scripts/rsl_rl/play.py.bak1 @@ -0,0 +1,270 @@ +import argparse +import sys +import os +import time + +import imageio +import numpy as np +import h5py +import torch + +# 先引入 AppLauncher,并尽早实例化 SimulationApp,避免 pxr 未加载 +from isaaclab.app import AppLauncher +import cli_args # isort: skip + +# CLI +parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.") +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出") +cli_args.add_rsl_rl_args(parser) +AppLauncher.add_app_launcher_args(parser) +args_cli, hydra_args = parser.parse_known_args() +if args_cli.video: + args_cli.enable_cameras = True +# 将 hydra 剩余参数传递 +sys.argv = [sys.argv[0]] + hydra_args + +# ==== 先实例化 SimulationApp ==== +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +# ==== 之后再 import 依赖 isaac/pxr 的模块 ==== +import gymnasium as gym +from rsl_rl.runners import DistillationRunner, OnPolicyRunner +from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config +import mindbot.tasks # noqa: F401 + +CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"] + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) + if not resume_path: + print("[INFO] No pre-trained checkpoint for this task.") + return + elif args_cli.checkpoint: + resume_path = retrieve_file_path(args_cli.checkpoint) + else: + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + log_dir = os.path.dirname(resume_path) + env_cfg.log_dir = log_dir + + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) + + policy = runner.get_inference_policy(device=env.unwrapped.device) + try: + policy_nn = runner.alg.policy + except AttributeError: + policy_nn = runner.alg.actor_critic + + # 导出模型 + export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") + normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None)) + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + + # dt = env.unwrapped.step_dt + + # # 录制缓冲 + # cam_buffers = {n: [] for n in CAM_NAMES} + # joint_log, joint_vel_log, action_log, ts_log = [], [], [], [] + # t0 = time.time() + + # obs = env.get_observations() + # timestep = 0 + # step_count = 0 + # try: + # while simulation_app.is_running(): + # start_time = time.time() + # with torch.inference_mode(): + # actions = policy(obs) + # obs, _, dones, _ = env.step(actions) + # policy_nn.reset(dones) + + # # 相机帧(取 env0,如需全部 env 去掉 [0]) + # for name in CAM_NAMES: + # if name not in env.unwrapped.scene.sensors: + # continue + # cam = env.unwrapped.scene.sensors[name] + # rgba = cam.data.output.get("rgba", cam.data.output.get("rgb")) + # if rgba is None: + # continue + # frame = rgba[0].cpu().numpy() + # if frame.shape[-1] == 4: + # frame = frame[..., :3] + # cam_buffers[name].append(frame) + + # # 关节 / 速度 / 动作 + # robot = env.unwrapped.scene["Mindbot"] + # joint_log.append(robot.data.joint_pos.cpu().numpy()) + # joint_vel_log.append(robot.data.joint_vel.cpu().numpy()) + # action_log.append(actions.cpu().numpy()) + # ts_log.append(time.time() - t0) + + # step_count += 1 + # if args_cli.max_steps and step_count >= args_cli.max_steps: + # break + + # if args_cli.video: + # timestep += 1 + # if timestep == args_cli.video_length: + # break + + # sleep_time = dt - (time.time() - start_time) + # if args_cli.real_time and sleep_time > 0: + # time.sleep(sleep_time) + # finally: + # # 保存 HDF5 + # h5_path = os.path.join(log_dir, "rollout_multi_cam.h5") + # with h5py.File(h5_path, "w") as f: + # f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip") + # f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip") + # f.create_dataset("actions", data=np.stack(action_log), compression="gzip") + # f.create_dataset("timestamps", data=np.array(ts_log)) + # for name, frames in cam_buffers.items(): + # if not frames: + # continue + # dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip") + # if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]: + # fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05 + # else: + # fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295 + # dset.attrs["fx"] = fx + # dset.attrs["fy"] = fy + # dset.attrs["cx"] = cx + # dset.attrs["cy"] = cy + # dset.attrs["focal_length"] = 1.93 + # print(f"[INFO] Saved HDF5 to {h5_path}") + + # # 可选:单路 MP4 + # head_frames = cam_buffers["cam_head"] + # if head_frames: + # fps = int(round(1.0 / dt)) + # video_path = os.path.join(log_dir, "cam_head.mp4") + # imageio.mimsave(video_path, head_frames, fps=fps) + # print(f"[INFO] Saved video to {video_path}") + + # env.close() + dt = env.unwrapped.step_dt + + # 录制缓冲 + cam_buffers = {n: [] for n in CAM_NAMES} + joint_log, joint_vel_log, action_log, ts_log = [], [], [], [] + t0 = time.time() + + obs = env.get_observations() + timestep = 0 + while simulation_app.is_running(): + start_time = time.time() + with torch.inference_mode(): + actions = policy(obs) + obs, _, dones, _ = env.step(actions) + policy_nn.reset(dones) + + # 相机帧(取 env0,如需全部 env 去掉 [0]) + for name in CAM_NAMES: + if name not in env.unwrapped.scene.sensors: + continue + cam = env.unwrapped.scene.sensors[name] + rgba = cam.data.output.get("rgba", cam.data.output.get("rgb")) + if rgba is None: + continue + frame = rgba[0].cpu().numpy() + if frame.shape[-1] == 4: + frame = frame[..., :3] + cam_buffers[name].append(frame) + + # 关节 / 速度 / 动作 + robot = env.unwrapped.scene["Mindbot"] + joint_log.append(robot.data.joint_pos.cpu().numpy()) + joint_vel_log.append(robot.data.joint_vel.cpu().numpy()) + action_log.append(actions.cpu().numpy()) + ts_log.append(time.time() - t0) + + if args_cli.video: + timestep += 1 + if timestep == args_cli.video_length: + break + + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # 保存 HDF5 + h5_path = os.path.join(log_dir, "rollout_multi_cam.h5") + with h5py.File(h5_path, "w") as f: + f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip") + f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip") + f.create_dataset("actions", data=np.stack(action_log), compression="gzip") + f.create_dataset("timestamps", data=np.array(ts_log)) + for name, frames in cam_buffers.items(): + if not frames: + continue + dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip") + # 内参:按你相机 vector 设置 + if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]: + fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05 + else: + fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295 + dset.attrs["fx"] = fx + dset.attrs["fy"] = fy + dset.attrs["cx"] = cx + dset.attrs["cy"] = cy + dset.attrs["focal_length"] = 1.93 + print(f"[INFO] Saved HDF5 to {h5_path}") + + # 可选:单路 MP4 + head_frames = cam_buffers["cam_head"] + if head_frames: + fps = int(round(1.0 / dt)) + video_path = os.path.join(log_dir, "cam_head.mp4") + imageio.mimsave(video_path, head_frames, fps=fps) + print(f"[INFO] Saved video to {video_path}") + + env.close() + + +if __name__ == "__main__": + main() + simulation_app.close() \ No newline at end of file diff --git a/source/mindbot/mindbot/robot/mindbot.py b/source/mindbot/mindbot/robot/mindbot.py index 1ff6391..301d188 100644 --- a/source/mindbot/mindbot/robot/mindbot.py +++ b/source/mindbot/mindbot/robot/mindbot.py @@ -18,8 +18,9 @@ from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR MINDBOT_CFG = ArticulationCfg( spawn=sim_utils.UsdFileCfg( # 1. UPDATE THE USD PATH to your local file - usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd", - # usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd", + # C:\Users\PC\workpalce\maic_usd_assets\twinlab\MIC_sim-3.0\pupet_arm_sim_product\sim_data_acquisition\usds\Collected_robot_2_3\robot_2_4.usd + usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd", + # usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd", rigid_props=sim_utils.RigidBodyPropertiesCfg( disable_gravity=False, max_depenetration_velocity=1.0, @@ -66,26 +67,26 @@ MINDBOT_CFG = ArticulationCfg( # "l_joint6": 0.0, # Right arm joints # -45° -> -0.7854 - # "r_joint1": -0.7854, - # # 70° -> 1.2217 - # "r_joint2": 1.2217, - # # 90° -> 1.5708 - # "r_joint3": 1.5708, - # # -90° -> -1.5708 - # "r_joint4": -1.5708, - # # 90° -> 1.5708 - # "r_joint5": 1.5708, - # # 70° -> 1.2217 - # "r_joint6": 1.2217, - # # Right arm joints - "r_joint1": 0.0, - "r_joint2": -1.5708, + "r_joint1": -0.7854, + # 70° -> 1.2217 + "r_joint2": 1.2217, + # 90° -> 1.5708 + "r_joint3": 1.5708, + # -90° -> -1.5708 + "r_joint4": -1.5708, + # 90° -> 1.5708 + "r_joint5": 1.5708, + # 70° -> 1.2217 + "r_joint6": 1.2217, + # Right arm joints + # "r_joint1": 0.0, + # # "r_joint2": -1.5708, # "r_joint2": 0.0, - "r_joint3": 0.0, - "r_joint4": 0.0, - "r_joint5": 0.0, - "r_joint6": 0.0, - # # left wheel + # "r_joint3": 0.0, + # "r_joint4": 0.0, + # "r_joint5": 0.0, + # "r_joint6": 0.0, + # # # left wheel # "left_b_revolute_Joint": 0.0, # "left_f_revolute_Joint": 0.0, # # right wheel @@ -102,7 +103,7 @@ MINDBOT_CFG = ArticulationCfg( # trunk "PrismaticJoint": 0.3, # head - "head_revoluteJoint": 0.5236 + "head_revoluteJoint": 0.0 #0.5236 }, # The initial (x, y, z) position of the robot's base in the world pos=(0, 0, 0.05), @@ -114,7 +115,7 @@ MINDBOT_CFG = ArticulationCfg( joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6 effort_limit_sim=100.0, # Note: Tune this based on your robot's specs velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs - stiffness=0.0, #100000.0, # Note: Tune this for desired control performance + stiffness=10000.0, #10000.0, # Note: Tune this for desired control performance damping=1000.0, #10000.0, # Note: Tune this for desired control performance ), # Group for the 6 right arm joints using a regular expression @@ -122,7 +123,7 @@ MINDBOT_CFG = ArticulationCfg( joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6 effort_limit_sim=100.0, # Note: Tune this based on your robot's specs velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs - stiffness=10000.0, #100000.0, # Note: Tune this for desired control performance1 + stiffness=0.0, #10000.0, # Note: Tune this for desired control performance1 damping=1000.0, #10000.0, # Note: Tune this for desired control performance ), "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0), @@ -142,9 +143,10 @@ MINDBOT_CFG = ArticulationCfg( ), "grippers": ImplicitActuatorCfg( joint_names_expr=[".*_hand_joint.*"], - stiffness=10000.0, + stiffness=10000.0, #10000.0 damping=1000.0, effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50. + velocity_limit_sim=50.0, ), }, ) diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py new file mode 100644 index 0000000..e8cf117 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Template-centrifuge-lidup-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg", + # "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..800b162 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 # 稍微增加每轮步数 + max_iterations = 5000 # 增加迭代次数,给它足够的时间学习 + save_interval = 100 + # experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了 + experiment_name = "mindbot_pullUltrasoundLidUp" + + policy = RslRlPpoActorCriticCfg( + init_noise_std=0.7, + actor_obs_normalization=True, # 开启归一化 + critic_obs_normalization=True, # 开启归一化 + # 增加网络容量:三层 MLP,宽度足够 + actor_hidden_dims=[128, 64, 32], + critic_hidden_dims=[128, 64, 32], + activation="elu", + ) + + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, # 保持适度的探索 + num_learning_epochs=5, + num_mini_batches=8, + learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4 + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +# @configclass +# class PPORunnerCfg(RslRlOnPolicyRunnerCfg): +# num_steps_per_env = 16 +# max_iterations = 150 +# save_interval = 50 +# experiment_name = "cartpole_direct" +# policy = RslRlPpoActorCriticCfg( +# init_noise_std=1.0, +# actor_obs_normalization=False, +# critic_obs_normalization=False, +# actor_hidden_dims=[32, 32], +# critic_hidden_dims=[32, 32], +# activation="elu", +# ) +# algorithm = RslRlPpoAlgorithmCfg( +# value_loss_coef=1.0, +# use_clipped_value_loss=True, +# clip_param=0.2, +# entropy_coef=0.005, +# num_learning_epochs=5, +# num_mini_batches=4, +# learning_rate=1.0e-3, +# schedule="adaptive", +# gamma=0.99, +# lam=0.95, +# desired_kl=0.01, +# max_grad_norm=1.0, +# ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py new file mode 100644 index 0000000..387c459 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py @@ -0,0 +1,636 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from re import A, T +from tkinter import N +import torch +from isaaclab.actuators import ImplicitActuatorCfg +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg +from isaaclab.sensors import CameraCfg + +from . import mdp + +## +# Pre-defined configs +## + +from mindbot.robot.mindbot import MINDBOT_CFG + +# ====== 其他物体配置 ====== +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg + + +TABLE_CFG=RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, -0.3, 0.005], + rot=[0.7071, 0.0, 0.0, 0.7071], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd", + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +LID_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Lid", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, 0.73, 0.1055], + rot=[1.0, 0.0, 0.0, 0.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +CENTRIFUGE_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0, + linear_damping=0.5, + angular_damping=0.5, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒 + # fix_root_link=True, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + # 1. 参照机器人配置,在这里定义初始关节角度 + joint_pos={ + "r1": math.radians(0.0), # 转子位置(无关紧要) + + # 【关键修改】:初始让盖子处于打开状态 + # 您的 USD 限位是 (-100, 0),-100度为最大开启 + "r2": math.radians(-100.0), + }, + pos=(0.95, -0.3, 0.855), + rot=[-0.7071, 0.0, 0.0, 0.7071], + ), + actuators={ + "lid_passive_mechanism": ImplicitActuatorCfg( + joint_names_expr=["r2"], + + # 【关键差异说明】 + # 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。 + # + # 但是!离心机盖子如果设为 10000,它就会像焊死在空中一样,机器人根本按不动。 + # 这里设为 200.0 左右: + # 1. 力度足以克服重力,让盖子初始保持打开。 + # 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。 + stiffness=200.0, + + # 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃 + damping=20.0, + + # 确保力矩上限不要太小,否则托不住盖子 + effort_limit_sim=1000.0, + velocity_limit_sim=100.0, + ), + # 转子可以硬一点,不需要被机器人按动 + "rotor_control": ImplicitActuatorCfg( + joint_names_expr=["r1"], + stiffness=1000.0, + damping=10.0, + ), + } +) +# CENTRIFUGE_CFG = ArticulationCfg( +# spawn=sim_utils.UsdFileCfg( +# usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd", +# rigid_props=sim_utils.RigidBodyPropertiesCfg( +# disable_gravity=False, +# max_depenetration_velocity=1.0, +# linear_damping=0.5, +# angular_damping=0.5, +# ), +# articulation_props=sim_utils.ArticulationRootPropertiesCfg( +# enabled_self_collisions=False,# +# solver_position_iteration_count=32, +# solver_velocity_iteration_count=16, +# stabilization_threshold=1e-6, +# fix_root_link=True, +# ), +# ), +# init_state=ArticulationCfg.InitialStateCfg( +# joint_pos={ +# "r1": math.radians(-100.0), +# "r2": math.radians(-100.0), +# }, +# pos=(0.95, -0.3, 0.855), +# rot=[-0.7071, 0.0, 0.0, 0.7071], +# ), +# # actuators={} +# actuators={ +# "passive_damper": ImplicitActuatorCfg( +# # ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他) +# joint_names_expr=["r2"], + +# # 【关键逻辑】 +# # 使用较高的 Stiffness (刚度) 模拟电机或弹簧锁死在特定位置 +# # 这样盖子才不会掉下来 +# stiffness=200.0, + +# # 适当的阻尼,防止盖子像弹簧一样疯狂抖动 +# damping=20.0, + +# # 确保力矩足够克服盖子的重力 +# effort_limit_sim=1000.0, +# velocity_limit_sim=100.0, +# ), +# # 如果有其他关节(如转子r1),可以单独配置 +# "rotor_control": ImplicitActuatorCfg( +# joint_names_expr=["r1"], +# stiffness=1000.0, +# damping=10.0, +# # joint_names_expr=[".*"], + +# # stiffness=0.0, +# # damping=10.0, +# # effort_limit_sim=100.0, +# # velocity_limit_sim=100.0, +# ), +# } +# ) + + +ROOM_CFG = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Room", + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd", + ), +) + +## +# Scene definition +## + + +@configclass +class MindbotSceneCfg(InteractiveSceneCfg): + """Configuration for a cart-pole scene.""" + + # ground plane + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)), + ) + + # robot + # Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room") + Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot") + table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table") + centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge") + lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + # lights + dome_light = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), + ) + """ + 添加相机定义 + # head_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_period=1/120, + # height=480, + # width=640, + # data_type=["rgb"], + # spawn=None, + # ) + """ + # left_hand_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # right_hand_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # head_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) + # chest_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + # update_period=1/120, + # height=480, + # width=640, + # data_types=["rgb"], + # spawn=None, + # ) +## +# MDP settings +## + +def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True) + pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins + return pos_w + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + # 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维) + left_arm_ee = DifferentialInverseKinematicsActionCfg( + asset_name="Mindbot", + joint_names=["l_joint[1-6]"], # 左臂的6个关节 + body_name="left_hand_body", # 末端执行器body名称 + controller=DifferentialIKControllerCfg( + command_type="pose", # 控制位置+姿态 + use_relative_mode=True, # 相对模式:动作是增量 + ik_method="dls", # Damped Least Squares方法 + ), + scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), + ) + + # right_arm_fixed = mdp.JointPositionActionCfg( + # asset_name="Mindbot", + # joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6 + + # # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动 + # scale=0.0, + + # # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里” + # # 对应 (135, -70, -90, 90, 90, -70) + # offset={ + # "r_joint1": 2.3562, + # "r_joint2": -1.2217, + # "r_joint3": -1.5708, + # "r_joint4": 1.5708, + # "r_joint5": 1.5708, + # "r_joint6": -1.2217, + # }, + # ) + + grippers_position = mdp.BinaryJointPositionActionCfg( + asset_name="Mindbot", + joint_names=["left_hand_joint_left", "left_hand_joint_right"], + + # 修正:使用字典格式 + # open_command_expr={"关节名正则": 值} + open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0}, + + # close_command_expr={"关节名正则": 值} + close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03}, + ) + + + trunk_position = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["PrismaticJoint"], + scale=0.00, + offset=0.3, + clip=None + ) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + + left_gripper_pos = ObsTerm(func=left_gripper_pos_w, + params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])}) + lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}) + centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")}) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # 重置所有关节到 init_state(无偏移)must be + reset_mindbot_all_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "position_range": (0.0, 0.0), + "velocity_range": (0.0, 0.0), + }, + ) + # 2. 底座安装误差 (建议缩小到 2cm) + reset_mindbot_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "pose_range": { + "x": (-0.05, 0.05), + "y": (-0.05, 0.05), + "z": (0.74, 0.75), + "yaw": (-0.1, 0.1), + }, + "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)}, + }, + ) + # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适) + reset_mindbot_left_arm = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]), + "position_range": (-0.01, 0.01), + "velocity_range": (0.0, 0.0), + }, + ) + + reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", + params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) + reset_centrifuge = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("centrifuge"), + "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + reset_lid = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("lid"), + "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + gripper_lid_orientation_alignment = RewTerm( + func=mdp.gripper_lid_orientation_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "gripper_body_name": "left_hand_body", + "gripper_forward_axis": (0.0, 0.0, 1.0), + "gripper_width_axis": (0.0, 1.0, 0.0), + "lid_handle_axis": (0.0, 1.0, 0.0), + "max_angle_deg": 85.0, # 允许60度内的偏差 + }, + weight=5.0 #original 5.0 + ) + + # stage 2 + # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m + gripper_lid_position_alignment = RewTerm( + func=mdp.gripper_lid_position_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, # Z方向:lid 上方 0.1m + "std": 0.3, # 位置对齐的容忍度 + }, + weight=3.0 #original 3.0 + ) + approach_lid_penalty = RewTerm( + func=mdp.gripper_lid_distance_penalty, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, + }, + # weight 为负数表示惩罚。 + # 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。 + weight=-1.0 + ) + # 【新增】精细对齐 (引导进入 2cm 圈) + gripper_lid_fine_alignment = RewTerm( + func=mdp.gripper_lid_position_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致 + "std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效 + }, + weight=10.0 # 高权重 + ) + + gripper_close_interaction = RewTerm( + func=mdp.gripper_close_when_near, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "joint_names": ["left_hand_joint_left", "left_hand_joint_right"], + "target_close_pos": 0.03, + "height_offset": 0.04, + "grasp_radius": 0.03, + }, + weight=10.0 + ) + + lid_lifted_reward = RewTerm( + func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 + params={ + "lid_cfg": SceneEntityCfg("lid"), + "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 + }, + weight=10.0 # 给一个足够大的权重 + ) + + lid_lifting_reward = RewTerm( + func=mdp.lid_lift_progress_reward, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 + "target_height_lift": 0.2, + "height_offset": 0.07, # 与其他奖励保持一致 + "std": 0.1 + }, + # 权重设大一点,告诉它这是最终目标 + weight=10.0 + ) + + + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + mindbot_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0}, + ) + + # lid_fly_away = DoneTerm( + # func=mdp.base_height_failure, + # params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0}, + # ) + + # 新增:盖子掉落判定 + # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间) + # lid_dropped = DoneTerm( + # func=mdp.base_height_failure, # 复用高度判定函数 + # params={ + # "asset_cfg": SceneEntityCfg("lid"), + # "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了 + # }, + # ) + + + +## +# Environment configuration +## + +@configclass +class CurriculumCfg: + pass + # action_rate = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}) + + # joint_vel = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}) + + # position_std_scheduler = CurrTerm( + # func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类 + # params={ + # # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键 + # "address": "rewards.gripper_lid_position_alignment.params.std", + + # # 修改逻辑:使用我们刚才写的函数 + # "modify_fn": mdp.annealing_std, + + # # 传给函数的参数 + # "modify_params": { + # "start_step": 00, # 约 600 轮 + # "end_step": 5_000, # 约 1200 轮 + # "start_std": 0.3, + # "end_std": 0.05, + # } + # } + # ) + +@configclass +class MindbotEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + events: EventCfg = EventCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + # curriculum: CurriculumCfg = CurriculumCfg() + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz + self.episode_length_s = 10 + # viewer settings + self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2) + # simulation settings + self.sim.dt = 1 / 120 #original 1 / 60 + self.sim.render_interval = self.decimation + # # 1. 基础堆内存 + self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB + self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 + self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024 + self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点 + self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch + + # # 5. 聚合对容量 (针对复杂的 Articulation) + self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024 diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py new file mode 100644 index 0000000..4ab42b0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the environment.""" + +from isaaclab.envs.mdp import * # noqa: F401, F403 + +from .rewards import * # noqa: F401, F403 +from .terminations import * # noqa: F401, F403 +from .gripper import * # noqa: F401, F403 +from .curriculums import * # noqa: F401, F403 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py new file mode 100644 index 0000000..4e5d833 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +from isaaclab.envs.mdp import modify_term_cfg + +def annealing_std( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + current_value: float, + start_step: int, + end_step: int, + start_std: float, + end_std: float +): + """ + 根据步数线性退火 std 值。 + + Args: + current_value: 当前的参数值 (系统自动传入) + start_step: 开始退火的步数 + end_step: 结束退火的步数 + start_std: 初始 std + end_std: 最终 std + """ + current_step = env.common_step_counter + + # 1. 还没到开始时间 -> 保持初始值 (或者不改) + if current_step < start_step: + # 如果当前值还没被设为 start_std,就强制设一下,否则不动 + return start_std + + # 2. 已经超过结束时间 -> 保持最终值 + elif current_step >= end_step: + return end_std + + # 3. 在中间 -> 线性插值 + else: + ratio = (current_step - start_step) / (end_step - start_step) + new_std = start_std + (end_std - start_std) * ratio + return new_std \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py new file mode 100644 index 0000000..981ac01 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py @@ -0,0 +1,54 @@ +# 假设这是在你的 mdp.py 文件中 + +from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg +from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction +from isaaclab.utils import configclass +import torch + + +class CoupledJointPositionAction(JointPositionAction): + def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): + super().__init__(cfg, env) + + @property + def action_dim(self) -> int: + """强制 ActionManager 认为只需要 1D 输入。""" + return 1 + + """ + 这是运行时被实例化的类。它继承自 JointPositionAction。 + """ + def process_actions(self, actions: torch.Tensor): + scale = self.cfg.scale + offset = self.cfg.offset + # store the raw actions + self._raw_actions[:] = torch.clamp(actions, -1, 1) + # apply the affine transformations + target_position_interval = self._raw_actions * scale + offset + right_cmd = target_position_interval + left_cmd = -target_position_interval + # import pdb; pdb.set_trace() + self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1) + # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}") + # clip actions + if self.cfg.clip is not None: + self._processed_actions = torch.clamp( + self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1] + ) + + +@configclass +class CoupledJointPositionActionCfg(JointPositionActionCfg): + """ + 配置类。关键在于设置 class_type 指向上面的实现类。 + """ + # !!! 关键点 !!! + # 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类" + class_type = CoupledJointPositionAction + + def __post_init__(self) -> None: + # 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它 + if len(self.joint_names) != 2: + raise ValueError("Requires exactly two joint names.") + super().__post_init__() + diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py new file mode 100644 index 0000000..561c2f6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py @@ -0,0 +1,881 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize joint position deviation from a target value.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + # wrap the joint positions to (-pi, pi) + joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids]) + # compute the reward + return torch.sum(torch.square(joint_pos - target), dim=1) + + +def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + """Reward the agent for lifting the lid above the minimal height.""" + lid: RigidObject = env.scene[lid_cfg.name] + # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,) + height = lid.data.root_pos_w[:, 2] + return torch.where(height > minimal_height, 1.0, 0.0) + +def gripper_lid_distance(env: ManagerBasedRLEnv, std: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for reaching the lid using tanh-kernel.""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # float 化,防止成为 (3,) 向量 + if not isinstance(std, float): + if torch.is_tensor(std): + std = std.item() + else: + std = float(std) + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + # body_idx = robot.find_bodies(gripper_body_name)[0] + # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1) + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :] + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6 + + return 1 - torch.tanh(distance / std) + + +def lid_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for grasping the lid (close and lifted).""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + + # Check if close and lifted + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > 0.1 + + return torch.where(is_close & is_lifted, 1.0, 0.0) + +def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Combined reward for reaching the lid AND lifting it.""" + # Get reaching reward + reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name) + # Get lifting reward + lift_reward = lid_is_lifted(env, minimal_height, lid_cfg) + # Combine rewards multiplicatively + return reach_reward * lift_reward + + + +# def gripper_lid_position_alignment(env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand__l", +# right_gripper_name: str = "left_hand_r", +# height_offset: float = 0.1, +# std: float = 0.1) -> torch.Tensor: +# """奖励函数:夹爪相对于lid的位置对齐。 + +# Args: +# env: 环境实例 +# lid_cfg: lid的配置 +# robot_cfg: 机器人的配置 +# left_gripper_name: 左侧夹爪body名称 +# right_gripper_name: 右侧夹爪body名称 +# side_distance: Y方向的期望距离 +# height_offset: Z方向的期望高度偏移 +# std: tanh核函数的标准差 + +# Returns: +# 位置对齐奖励 (num_envs,) +# """ +# lid: RigidObject = env.scene[lid_cfg.name] +# robot: Articulation = env.scene[robot_cfg.name] + +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# # 获取两个夹爪的位置 +# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) +# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + +# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) +# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + +# # 计算夹爪中心位置 +# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + +# # 计算相对位置(夹爪中心相对于 lid 中心) +# rel_pos = gripper_center - lid_pos_w # (num_envs, 3) + +# # X 方向位置:应该对齐(接近 0) +# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6 +# x_reward = 1.0 - torch.tanh(x_dist / std) + +# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance) +# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6 +# # y_error = torch.abs(y_dist - side_distance) +# # y_reward = 1.0 - torch.tanh(y_error / std) +# y_reward = 1.0 - torch.tanh(y_dist / std) + +# # Z 方向位置:应该在 lid 上方 height_offset 处 +# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6 +# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6 +# z_reward = 1.0 - torch.tanh(z_error / std) + +# # 组合位置奖励 +# position_reward = x_reward * y_reward * z_reward + +# return position_reward + + +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:夹爪相对于lid的位置对齐(简化版)。 + + 计算夹爪中心到目标点(lid上方height_offset处)的距离奖励。 + + Args: + env: 环境实例 + lid_cfg: lid的配置 + robot_cfg: 机器人的配置 + left_gripper_name: 左侧夹爪body名称 + right_gripper_name: 右侧夹爪body名称 + height_offset: Z方向的期望高度偏移(目标点在lid上方) + std: tanh核函数的标准差 + + Returns: + 位置对齐奖励 (num_envs,) + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 目标点:lid位置 + height_offset(Z方向) + target_pos = lid_pos_w.clone() + # print(target_pos[0]) + target_pos[:, 2] += height_offset # 在lid上方height_offset处 + # print(target_pos[0]) + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + # print(gripper_center[0]) + # 计算夹爪中心到目标点的3D距离 + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 距离奖励:距离越小,奖励越大 + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# ... (保留原有的 import) + +# ============================================================================== +# 新增:直接距离惩罚 (线性引导) +# ============================================================================== +def gripper_lid_distance_penalty( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.07, +) -> torch.Tensor: + """ + 计算夹爪中心到目标点的欧式距离。 + 返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。 + 这样:距离越远 -> 惩罚越大 (Total Reward 越低)。 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # 1. 目标点 (Lid中心 + 高度偏移) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 2. 夹爪中心 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + gripper_center = (left_pos + right_pos) / 2.0 + + # 3. 计算距离 + distance = torch.norm(gripper_center - lid_pos_w, dim=1) + + return distance + +# ============================================================================== +# 优化:位置对齐 (保持原函数,但确保逻辑清晰) +# ============================================================================== +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)""" + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + target_pos = lid_pos_w.clone() + target_pos[:, 2] += height_offset + + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# def gripper_lid_orientation_alignment( +# env, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# gripper_body_name: str = "left_hand_body", +# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z +# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) +# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y +# ) -> torch.Tensor: + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 获取姿态 +# lid_quat_w = lid.data.root_quat_w +# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) +# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + +# # 3. 垂直对齐 (Vertical Alignment) +# # 目标:夹爪 +Y 指向 World -Z +# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) +# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) +# # print(grip_fwd_world[0]) +# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + +# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) +# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add +# # 映射 [-1, 1] -> [0, 1],且全程有梯度 +# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加 +# # rew_vertical = (dot_vertical + 1.0) / 2.0 +# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0) +# rew_vertical = torch.pow(val_vertical, 2) + 1e-4 +# # nan -> 0 +# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + +# # 4. 偏航对齐 (Yaw Alignment) +# # 目标:夹爪 +Z 平行于 Lid +Y +# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) +# grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + +# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) +# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + +# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) +# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 +# # nan -> 0 +# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + +# # 5. 组合 +# total_reward = rew_vertical * rew_yaw + +# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + +# # debug +# if not torch.isfinite(total_reward).all(): +# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") +# print(f"rew_vertical: {rew_vertical}") +# print(f"rew_yaw: {rew_yaw}") + +# return total_reward + + +def gripper_lid_orientation_alignment( + env, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body", + gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z + gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) + lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y + max_angle_deg: float = 60.0, # 允许的最大角度偏差(度) +) -> torch.Tensor: + + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 获取姿态 + lid_quat_w = lid.data.root_quat_w + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + + # 3. 垂直对齐 (Vertical Alignment) + # 目标:夹爪前向轴指向 World -Z(向下) + grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) + grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) + world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + + # 计算点积(归一化后,点积 = cos(角度)) + dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) + dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) + + # 计算角度(弧度) + angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0)) + max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device) + + # 如果角度 <= max_angle_deg,给予奖励 + # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0 + angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0) + rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大 + + # 如果角度超过60度,奖励为0 + rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical)) + + # nan -> 0 + rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + + # 4. 偏航对齐 (Yaw Alignment) + # 目标:夹爪 +Z 平行于 Lid +Y + grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) + grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + + lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) + lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + + dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) + rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 + # nan -> 0 + rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + + # 5. 组合 + total_reward = rew_vertical * rew_yaw + + total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + + # debug + if not torch.isfinite(total_reward).all(): + print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") + print(f"rew_vertical: {rew_vertical}") + print(f"rew_yaw: {rew_yaw}") + + return total_reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body +# # 注意:确保 joint_names 只包含那两个夹爪关节 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合 +# target_open_pos: float = 0.0, # 【修正】张开是 0.0 +# target_close_pos: float = 0.03 # 【修正】闭合是 0.03 +# ) -> torch.Tensor: +# """ +# 逻辑很简单: +# 1. 如果 距离 < close_threshold:目标关节位置 = 0.03 (闭合) +# 2. 如果 距离 >= close_threshold:目标关节位置 = 0.0 (张开) +# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算距离 (末端 vs Lid中心) +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # distance: (num_envs,) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 3. 动态确定目标关节值 +# # 如果距离小于阈值,目标设为 close_pos,否则设为 open_pos +# # target_val: (num_envs, 1) +# # print(distance) +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 4. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# # joint_pos: (num_envs, 2) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] + +# # 取绝对值(防止左指是负数的情况,虽然你的配置里范围看起来都是正的,加上abs更保险) +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 5. 计算误差 +# # error: (num_envs,) -> 两个手指误差的平均值 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # # 6. 返回负奖励 (惩罚) +# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。 +# # return -error +# # ===================================================== +# # 🚀 核心修复:安全输出 +# # ===================================================== +# # 1. 使用 tanh 限制数值范围在 [-1, 0] +# reward = -torch.tanh(error / 0.01) + +# # 2. 过滤 NaN!这一步能救命! +# # 如果物理引擎算出 NaN,这里会把它变成 0.0,防止训练崩溃 +# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0) + +# return reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, +# target_open_pos: float = 0.0, +# target_close_pos: float = 0.03, +# height_offset: float = 0.06, # 新增:与位置奖励保持一致 +# ) -> torch.Tensor: +# """ +# 逻辑: +# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。 +# 2. 如果距离 < close_threshold:目标关节位置 = 闭合。 +# 3. 否则:目标关节位置 = 张开。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移) +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置 + +# # 获取夹爪位置 +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # 3. 计算距离 (夹爪 vs 目标抓取点) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 4. 动态确定目标关节值 +# # 靠近抓取点 -> 闭合;远离 -> 张开 +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 5. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 6. 计算误差并返回奖励 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # 使用 tanh 限制数值范围 +# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大 + +# return torch.nan_to_num(reward, nan=0.0) + + +def _is_grasped( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg, + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + height_offset: float, + grasp_radius: float, + target_close_pos: float +) -> torch.Tensor: + """ + 内部辅助函数:判定是否成功抓取。 + 条件: + 1. 夹爪中心在把手目标点附近 (Sphere Check) + 2. 夹爪处于闭合发力状态 (Joint Effort Check) + 3. (关键) 夹爪Z轴高度合适,不能压在盖子上面 (Z-Axis Check) + """ + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 目标点位置 (把手中心) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 3. 夹爪位置 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + gripper_center = (pos_L + pos_R) / 2.0 + + # 4. 条件一:距离判定 (在把手球范围内) + dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1) + is_near = dist_to_handle < grasp_radius + + # 5. 条件二:Z轴高度判定 (防止压在盖子上) + # 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高 + # 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多 + z_diff = gripper_center[:, 2] - lid_pos_w[:, 2] + is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差 + + # 6. 条件三:闭合判定 (正在尝试闭合) + joint_ids, _ = robot.find_joints(joint_names) + # 获取关节位置 (绝对值) + joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + + # === 修改闭合判定 === + + # 1. 关节位置判定 (Effort Check) + # 只要用力了就行,去掉上限判定,防止夹太紧被误判 + # 假设 0.05 是你的新 target_close_pos + is_effort_closing = torch.all(joint_pos > 0.005, dim=1) + + # 2. 几何排斥判定 (Geometry Check) + # 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小 + # 获取指尖位置 + dist_fingertips = torch.norm(pos_L - pos_R, dim=-1) + + # 假设把手厚度是 1cm (0.01m) + # 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西 + # 如果指尖距离 >= 0.005,说明中间有东西挡着 + is_not_empty = dist_fingertips > 0.005 + + # 综合判定: + # 1. 在把手附近 (is_near) + # 2. 高度合适 (is_z_valid) + # 3. 在用力闭合 (is_effort_closing) + # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了! + return is_near & is_z_valid & is_effort_closing & is_not_empty + + +def gripper_close_when_near( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + target_close_pos: float = 0.03, + height_offset: float = 0.02, + grasp_radius: float = 0.05 +) -> torch.Tensor: + + # 1. 判定基础抓取条件是否满足 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算夹紧程度 (Clamping Intensity) + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + # 获取当前关节位置绝对值 (num_envs, num_joints) + current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 计算两个指关节的平均闭合深度 + mean_joint_pos = torch.mean(current_joint_pos, dim=1) + + # 计算奖励系数:当前位置 / 目标闭合位置 + # 这样当关节越接近 0.03 时,奖励越接近 1.0 + # 强制让 Agent 产生“压满”动作的冲动 + clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0) + + # 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励 + # 如果没对准或者没夹到,奖励依然是 0 + return torch.where(is_grasped, clamping_factor, 0.0) + +# def gripper_close_when_near( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 +# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 +# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# target_close_pos: float = 0.03, +# height_offset: float = 0.03, +# grasp_radius: float = 0.02 # 球面半径 2cm +# ) -> torch.Tensor: + +# # 1. 获取位置 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 把手中心 + +# # 获取左右指尖位置 +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# # 计算夹爪中心 +# pos_center = (pos_L + pos_R) / 2.0 + +# # 2. 距离判定 (Is Inside Sphere?) +# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) +# is_in_sphere = (dist_center < grasp_radius).float() + +# # 3. "在中间"判定 (Is Between?) +# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 +# vec_LR = pos_R - pos_L # 左指指向右指 +# vec_LO = lid_pos_w - pos_L # 左指指向物体 + +# # 计算投影比例 t +# # P_proj = P_L + t * (P_R - P_L) +# # t = (vec_LO . vec_LR) / |vec_LR|^2 +# # 如果 0 < t < 1,说明投影在两个指尖之间 + +# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 +# dot = torch.sum(vec_LO * vec_LR, dim=-1) +# t = dot / len_sq + +# is_between = (t > 0.0) & (t < 1.0) +# is_between = is_between.float() + +# # 4. 闭合判定 +# joint_ids, _ = robot.find_joints(joint_names) +# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题 +# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) +# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1) +# # 只有当非常接近闭合目标时才给分 +# is_closing = (close_error < 0.005).float() # 允许 5mm 误差 + +# # 5. 最终奖励 +# # 只有三者同时满足才给 1.0 分 +# reward = is_in_sphere * is_between * is_closing + +# return torch.nan_to_num(reward, nan=0.0) + + +# def lid_is_lifted( +# env:ManagerBasedRLEnv, +# minimal_height:float, +# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid") +# ) -> torch.Tensor: + +# lid = env.scene[lid_cfg.name] +# lid_height = lid.data.root_pos_w[:, 2] +# reward=torch.where(lid_height > minimal_height, 1.0, 0.0) +# reward=torch.nan_to_num(reward, nan=0.0) + +# return reward + + +def lid_is_lifted( + env: ManagerBasedRLEnv, + minimal_height: float = 0.05, # 相对提升阈值 + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + height_offset: float = 0.03, + grasp_radius: float = 0.1, + target_close_pos: float = 0.03, +) -> torch.Tensor: + + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + lid = env.scene[lid_cfg.name] + + # 1. 获取高度 + current_height = lid.data.root_pos_w[:, 2] + # 自动获取初始高度 + initial_height = lid.data.default_root_state[:, 2] + + # 2. 计算提升量 + lift_height = torch.clamp(current_height - initial_height, min=0.0) + + # 3. 速度检查 (防撞飞) + # 如果速度 > 1.0 m/s,视为被撞飞,不给分 + lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1) + is_stable = lid_vel < 1.0 + + # 4. 计算奖励 + # 基础分:高度越高分越高 + shaping_reward = lift_height * 2.0 + # 成功分:超过阈值 + success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0) + + # 组合 + # 必须 is_grasped AND is_stable + reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height)) + + return torch.nan_to_num(reward, nan=0.0) + + + +def lid_lift_success_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand_body", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + # 关键参数 + settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调) + target_lift_height: float = 0.05, # 目标提升高度 (5cm) + grasp_dist_threshold: float = 0.05, # 抓取判定距离 + closed_pos: float = 0.03 # 夹爪闭合阈值 +) -> torch.Tensor: + """ + 提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。 + """ + # 1. 获取数据 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 盖子实时高度 + lid_z = lid.data.root_pos_w[:, 2] + lid_pos = lid.data.root_pos_w[:, :3] + + # 夹爪位置 + body_ids, _ = robot.find_bodies([left_gripper_name]) + gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] + + # 2. 【条件一:是否夹稳 (Is Grasped?)】 + # 距离检查 + distance = torch.norm(gripper_pos - lid_pos, dim=-1) + is_close = distance < grasp_dist_threshold + + # 闭合检查 + joint_ids, _ = robot.find_joints(joint_names) + joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 假设 > 80% 的闭合度算抓紧了 + is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1) + + is_grasped = is_close & is_closed + + # 3. 【条件二:计算相对提升 (Relative Lift)】 + # 当前高度 - 初始稳定高度 + current_lift = lid_z - settled_height + + # 4. 计算奖励 + lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0) + + # 基础提升分 (Shaping Reward) + lift_reward = lift_progress + + # 成功大奖 (Success Bonus) + # 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖 + success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0) + + # 组合:只有在 is_grasped 为 True 时才发放提升奖励 + total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward)) + + # 5. 安全输出 + return torch.nan_to_num(total_reward, nan=0.0) + + +def lid_lift_progress_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + initial_height: float = 0.8, + target_height_lift: float = 0.15, + height_offset: float = 0.02, # 必须与抓取奖励保持一致 + grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分 + target_close_pos: float = 0.03, + std: float = 0.05 # 标准差 +) -> torch.Tensor: + + # 1. 判定是否夹住 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算高度 + lid = env.scene[lid_cfg.name] + current_height = lid.data.root_pos_w[:, 2] + lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift) + # print(current_height) + # print(lift_height) + # 3. 计算奖励 + # 只有在 is_grasped 为 True 时,才发放高度奖励 + # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况 + progress = torch.tanh(lift_height / std) + + reward = torch.where(is_grasped, progress, 0.0) + + return reward + diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py new file mode 100644 index 0000000..8b31be2 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py @@ -0,0 +1,97 @@ +from __future__ import annotations +import torch +from typing import TYPE_CHECKING +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +def lid_dropped(env: ManagerBasedRLEnv, + minimum_height: float = -0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + return lid.data.root_pos_w[:, 2] < minimum_height + +def lid_successfully_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.03, + height_threshold: float = 0.2, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1) + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > height_threshold + return is_close & is_lifted + +def gripper_at_lid_side(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", # 改为两个下划线 + right_gripper_name: str = "left_hand_r", + side_distance: float = 0.05, + side_tolerance: float = 0.01, + alignment_tolerance: float = 0.02, + height_offset: float = 0.1, + height_tolerance: float = 0.02) -> torch.Tensor: + """Terminate when gripper center is positioned on the side of the lid at specified height. + + 坐标系说明: + - X 方向:两个夹爪朝中心合并的方向 + - Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致 + - Z 方向:高度方向 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + rel_pos = gripper_center - lid_pos_w + + # Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance) + y_dist = torch.abs(rel_pos[:, 1]) + y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance) + + # X 方向:应该对齐(接近 0) + x_dist = torch.abs(rel_pos[:, 0]) + x_ok = x_dist < alignment_tolerance + + # Z 方向:应该在 lid 上方 height_offset 处 + z_error = torch.abs(rel_pos[:, 2] - height_offset) + z_ok = z_error < height_tolerance + + # 所有条件都要满足 + return x_ok & y_ok & z_ok + + +def base_height_failure(env: ManagerBasedRLEnv, + minimum_height: float | None = None, + maximum_height: float | None = None, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + """Terminate when the robot's base height is outside the specified range.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + root_pos_z = asset.data.root_pos_w[:, 2] + + # check if height is outside the range + out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool) + if minimum_height is not None: + out_of_bounds |= root_pos_z < minimum_height + if maximum_height is not None: + out_of_bounds |= root_pos_z > maximum_height + + return out_of_bounds + diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py index 90e987a..5ff2228 100644 --- a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py +++ b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py @@ -177,200 +177,11 @@ def gripper_handle_orientation_alignment( # debug if not torch.isfinite(total_reward).all(): print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") - print(f"rew_vertical: {rew_forward}") + print(f"rew_vertical: {dot_fwd}") print(f"rew_yaw: {rew_yaw}") return total_reward - # # 计算点积:我们希望它们垂直,即点积为 0 - # dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1) - - # # 奖励:1.0 - abs(dot)。点积越小(越垂直),分数越高 - # rew_perpendicular = 1.0 - torch.abs(dot_width_handle) - - # # 稍微增强一下敏感度,让它更倾向于完美垂直 - # rew_perpendicular = rew_perpendicular * rew_perpendicular - - # # 3. 组合奖励 - # total_reward = rew_forward * rew_perpendicular - - # return torch.nan_to_num(total_reward, nan=0.0) - - -# def gripper_handle_position_alignment(env: ManagerBasedRLEnv, -# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), -# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), -# handle_name: str = "Equipment_BB_13_C", -# left_gripper_name: str = "left_hand__l", -# right_gripper_name: str = "left_hand_r", -# height_offset: float = 0.03, -# std: float = 0.1) -> torch.Tensor: - -# dryingbox: RigidObject = env.scene[dryingbox_cfg.name] -# robot: Articulation = env.scene[robot_cfg.name] - -# # lid_pos_w = lid.data.root_pos_w[:, :3] -# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) -# # [num_envs, 3] -# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] -# # 目标点:lid位置 + height_offset(Z方向) -# target_pos = handle_pos_w.clone() -# # print(target_pos[0]) -# target_pos[:, 0] -= height_offset # 在lid上方height_offset处 -# # print(target_pos[0]) -# # 获取两个夹爪的位置 -# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) -# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) - -# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) -# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) - -# # 计算夹爪中心位置 -# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) -# # print(gripper_center[0]) -# # 计算夹爪中心到目标点的3D距离 -# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 - -# # 距离奖励:距离越小,奖励越大 -# position_reward = 1.0 - torch.tanh(distance / std) - -# return position_reward - - -# def gripper_handle_position_alignment( -# env, -# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), -# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), -# handle_name: str = "Equipment_BB_13_C", -# left_finger_name: str = "right_hand_l", -# right_finger_name: str = "right_hand__r", -# # 【修改点1】:根据你的描述,长轴是 Local Z,所以改为 (0, 0, 1) -# handle_local_axis: tuple = (0.0, 0.0, 1.0), -# # 【新增】:明确指定手柄的“侧面”是 Local X 轴 -# handle_local_side_axis: tuple = (0.0, -1.0, 0.0), -# grasp_width: float = 0.03, -# grasp_offset: float = 0.06, -# std: float = 0.05, -# ) -> torch.Tensor: -# """ -# 修正版: -# 1. 修正了手柄长轴为 Local Z。 -# 2. 使用 Local X 作为手柄两侧的基准方向。 -# """ -# # 1. 获取数据 -# dryingbox = env.scene[dryingbox_cfg.name] -# robot = env.scene[robot_cfg.name] - -# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) - -# # [num_envs, 3] -# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] -# # [num_envs, 4] -# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :] - -# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True) -# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True) -# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :] -# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :] - -# # 2. 计算关键向量 -# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z) -# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1) -# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励 -# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) - -# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y) -# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转 -# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1) -# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec)) - -# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱) -# # 注意:如果你的烘箱放置位置旋转了,这里也建议改为读取机器人的前方或者物体的Local方向 -# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1) - -# # 3. 计算理想的抓取点 -# # 抓取中心:从手柄中心沿着抓取方向(World X)反向退后 grasp_offset -# grasp_center = handle_pos_w - approach_dir * grasp_offset - -# # 计算理想的左/右手指位置 -# # 既然 side_vec_world 是 Local X (对应 World Y) -# # 我们希望手指在 Y 轴两侧张开 -# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0) -# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0) - -# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧 -# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。 - -# # 4. 计算距离奖励 -# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1) -# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1) - -# rew_l = 1.0 - torch.tanh(dist_l / std) -# rew_r = 1.0 - torch.tanh(dist_r / std) - -# total_reward = (rew_l + rew_r) / 2.0 - -# return torch.nan_to_num(total_reward, nan=0.0) - -# def gripper_handle_position_alignment( -# env, -# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), -# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), -# handle_name: str = "Equipment_BB_13_C", -# left_finger_name: str = "right_hand_l", # 左手指 body 名称 -# right_finger_name: str = "right_hand__r", # 右手指 body 名称 -# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴 -# grasp_width: float = 0.03, # 预期的抓取宽度 (米) -# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移 -# std: float = 0.05, # 奖励函数的敏感度 -# ) -> torch.Tensor: -# """ -# 双指位置奖励:引导左/右手指分别到达手柄的两侧。 -# """ -# # 1. 获取数据 -# dryingbox = env.scene[dryingbox_cfg.name] -# robot = env.scene[robot_cfg.name] - -# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) -# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] -# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :] - -# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True) -# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True) -# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :] -# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :] - -# # 2. 计算理想的抓取点 -# # 获取手柄当前轴向 -# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1) -# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) - -# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱) -# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1) - -# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向 -# side_vec = normalize(torch.cross(approach_dir, handle_axis_world)) - -# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间) -# grasp_center = handle_pos_w - approach_dir * grasp_offset - -# # 计算理想的左/右手指位置 (在中心点两侧张开) -# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右 -# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0) -# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0) - -# # 3. 计算距离奖励 -# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1) -# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1) - -# rew_l = 1.0 - torch.tanh(dist_l / std) -# rew_r = 1.0 - torch.tanh(dist_r / std) - -# # 取平均值 -# total_reward = (rew_l + rew_r) / 2.0 - -# return torch.nan_to_num(total_reward, nan=0.0) - # 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name def _is_grasped( @@ -400,8 +211,8 @@ def _is_grasped( handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone() handle_pos_w[:, 1] += height_offset # X方向偏移 - handle_pos_w[:, 0] += 0.02 - handle_pos_w[:, 2] -= 0.04 + handle_pos_w[:, 0] += 0.005 + # handle_pos_w[:, 2] -= 0.04 # 3. 夹爪位置 l_ids, _ = robot.find_bodies([left_gripper_name]) r_ids, _ = robot.find_bodies([right_gripper_name]) @@ -411,7 +222,7 @@ def _is_grasped( # 4. 条件一:距离判定 (在把手球范围内) dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1) - if env.common_step_counter % 100 == 0: + if env.common_step_counter % 500 == 0: print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m") is_near = dist_to_handle < grasp_radius @@ -479,80 +290,51 @@ def gripper_close_when_near( # def gripper_close_when_near( # env: ManagerBasedRLEnv, -# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), # robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), -# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 -# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 -# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 -# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], -# target_close_pos: float = 0.03, -# height_offset: float = 0.03, -# grasp_radius: float = 0.02 # 球面半径 2cm +# handle_name: str = "Equipment_BB_13_C", +# left_gripper_name: str = "right_hand_l", +# right_gripper_name: str = "right_hand__r", +# joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 保留接口但不强依赖 +# # 关键参数 +# handle_thickness: float = 0.0388, # 把手厚度(米),≈3.88cm +# thickness_tol: float = 0.008, # 容差(米),≈0.8cm,控制惩罚/奖励斜率 +# grasp_radius: float = 0.04, # 抓取判定半径,略大于把手厚度 +# height_offset: float = 0.00, # 如需上下偏移可调整 +# target_close_pos: float = 0.03, # 保留给 joint/clamping 兼容 # ) -> torch.Tensor: - -# # 1. 获取位置 -# lid = env.scene[lid_cfg.name] +# dryingbox = env.scene[dryingbox_cfg.name] # robot = env.scene[robot_cfg.name] -# lid_pos_w = lid.data.root_pos_w[:, :3].clone() -# lid_pos_w[:, 2] += height_offset # 把手中心 - -# # 获取左右指尖位置 + +# # 目标位置(把手中心,可做微偏移) +# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) +# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone() +# handle_pos[:, 1] += height_offset + +# # 手指位置与中心 # l_ids, _ = robot.find_bodies([left_gripper_name]) # r_ids, _ = robot.find_bodies([right_gripper_name]) # pos_L = robot.data.body_pos_w[:, l_ids[0], :3] # pos_R = robot.data.body_pos_w[:, r_ids[0], :3] - -# # 计算夹爪中心 -# pos_center = (pos_L + pos_R) / 2.0 - -# # 2. 距离判定 (Is Inside Sphere?) -# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) -# is_in_sphere = (dist_center < grasp_radius).float() - -# # 3. "在中间"判定 (Is Between?) -# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 -# vec_LR = pos_R - pos_L # 左指指向右指 -# vec_LO = lid_pos_w - pos_L # 左指指向物体 - -# # 计算投影比例 t -# # P_proj = P_L + t * (P_R - P_L) -# # t = (vec_LO . vec_LR) / |vec_LR|^2 -# # 如果 0 < t < 1,说明投影在两个指尖之间 - -# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 -# dot = torch.sum(vec_LO * vec_LR, dim=-1) -# t = dot / len_sq - -# is_between = (t > 0.0) & (t < 1.0) -# is_between = is_between.float() - -# # 4. 闭合判定 -# joint_ids, _ = robot.find_joints(joint_names) -# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题 -# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) -# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1) -# # 只有当非常接近闭合目标时才给分 -# is_closing = (close_error < 0.005).float() # 允许 5mm 误差 - -# # 5. 最终奖励 -# # 只有三者同时满足才给 1.0 分 -# reward = is_in_sphere * is_between * is_closing - -# return torch.nan_to_num(reward, nan=0.0) +# center = (pos_L + pos_R) / 2.0 + +# # 到把手的距离(只在近距离才给分) +# dist = torch.norm(center - handle_pos, dim=-1) +# is_near = dist < grasp_radius + +# # 实际两指间距 +# finger_dist = torch.norm(pos_L - pos_R, dim=-1) + +# # 奖励:两指间距越接近把手厚度越好,平滑衰减 +# error = torch.abs(finger_dist - handle_thickness) +# close_reward = 1.0 - torch.tanh(error / thickness_tol) + +# # 避免 NaN +# close_reward = torch.nan_to_num(close_reward, nan=0.0) + +# return torch.where(is_near, close_reward, torch.zeros_like(close_reward)) -# def lid_is_lifted( -# env:ManagerBasedRLEnv, -# minimal_height:float, -# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid") -# ) -> torch.Tensor: - -# lid = env.scene[lid_cfg.name] -# lid_height = lid.data.root_pos_w[:, 2] -# reward=torch.where(lid_height > minimal_height, 1.0, 0.0) -# reward=torch.nan_to_num(reward, nan=0.0) - -# return reward def lid_is_lifted( @@ -698,39 +480,7 @@ def lid_lift_progress_reward( return reward -# def lid_grasped_bonus( -# env: ManagerBasedRLEnv, -# lid_cfg: SceneEntityCfg, -# robot_cfg: SceneEntityCfg, -# left_gripper_name: str, -# joint_names: list, -# distance_threshold: float = 0.05, -# closed_pos: float = 0.03 -# ) -> torch.Tensor: -# # 1. 计算距离 -# lid = env.scene[lid_cfg.name] -# robot = env.scene[robot_cfg.name] -# body_ids, _ = robot.find_bodies([left_gripper_name]) -# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] -# lid_pos = lid.data.root_pos_w[:, :3] -# distance = torch.norm(gripper_pos - lid_pos, dim=-1) - -# # 2. 检查距离是否达标 -# is_close = distance < distance_threshold - -# # 3. 检查夹爪是否闭合 -# joint_ids, _ = robot.find_joints(joint_names) -# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) -# # 检查是否接近闭合 (比如 > 0.02) -# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1) - -# # 4. 给分 -# # 只有同时满足才给 1.0 -# reward = torch.where(is_close & is_closed, 1.0, 0.0) - -# return torch.nan_to_num(reward) -# 在 rewards.py 文件开头添加调试函数 def debug_robot_physics( env: ManagerBasedRLEnv, robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), @@ -771,64 +521,15 @@ def debug_robot_physics( # 4. 检查是否有异常高的速度(可能导致碰撞问题) high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3) - if torch.any(high_vel_mask): - env_ids = torch.where(high_vel_mask)[0].cpu().tolist() - for env_id in env_ids[:3]: # 只打印前3个环境 - print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:") - print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)") - print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)") - print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)") - print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)") - print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s") - -# def gripper_handle_position_alignment(env: ManagerBasedRLEnv, -# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), -# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), -# handle_name: str = "Equipment_BB_13_C", -# left_gripper_name: str = "right_hand_l", -# right_gripper_name: str = "right_hand__r", -# height_offset: float = 0.03, -# std: float = 0.1) -> torch.Tensor: - -# dryingbox: RigidObject = env.scene[dryingbox_cfg.name] -# robot: Articulation = env.scene[robot_cfg.name] - -# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) -# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] -# target_pos = handle_pos_w.clone() -# target_pos[:, 1] += height_offset -# target_pos[:, 0] += 0.02 -# target_pos[:, 2] -= 0.04 - -# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) -# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) - -# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] -# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] - -# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 -# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 - -# # ===== 添加调试打印 ===== -# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息 -# close_mask = distance < 0.03 -# if torch.any(close_mask): -# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5) -# # ======================== - -# position_reward = 1.0 - torch.tanh(distance / std) - -# # 计算距离后插入 -# if env.common_step_counter % 200 == 0: -# print("pos_align dbg env0: " -# f"handle={handle_pos_w[0].cpu().numpy()}, " -# f"target={target_pos[0].cpu().numpy()}, " -# f"gripL={left_gripper_pos[0].cpu().numpy()}, " -# f"gripR={right_gripper_pos[0].cpu().numpy()}, " -# f"center={gripper_center[0].cpu().numpy()}, " -# f"dist={distance[0].item():.4f}") - -# return position_reward + # if torch.any(high_vel_mask): + # env_ids = torch.where(high_vel_mask)[0].cpu().tolist() + # for env_id in env_ids[:3]: # 只打印前3个环境 + # print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:") + # print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)") + # print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)") + # print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)") + # print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)") + # print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s") def gripper_handle_position_alignment(env: ManagerBasedRLEnv, dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), @@ -846,8 +547,8 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv, handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] target_pos = handle_pos_w.clone() target_pos[:, 1] += height_offset - # target_pos[:, 0] += 0.02 - target_pos[:, 2] -= 0.02 + target_pos[:, 0] += 0.005 + # target_pos[:, 2] -= 0.04 left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) @@ -877,4 +578,258 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv, f"center={gripper_center[0].cpu().numpy()}, " f"dist={distance[0].item():.4f}") - return position_reward \ No newline at end of file + return position_reward + + + +def gripper_open_when_far( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用 + robot_cfg: SceneEntityCfg, + handle_name: str, # 保留接口一致性,但不再使用 + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, # 不再使用 + dist_threshold: float = 0.5, # 不再使用 + target_open_dist: float = 0.06, # 两手指完全打开时的距离(米) +) -> torch.Tensor: + """ + 奖励夹爪最大张开状态: + 只根据左右两根手指之间的实际距离计算奖励, + 当距离接近 target_open_dist (0.06m) 时奖励最大。 + """ + + # 1. 获取机器人 + robot = env.scene[robot_cfg.name] + + # 2. 获取左右手指的世界坐标 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + + # 3. 计算两手指之间的实际距离 + finger_dist = torch.norm(pos_L - pos_R, dim=-1) + + # 4. 奖励:距离越接近最大打开距离,奖励越高 + error = torch.abs(finger_dist - target_open_dist) + reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度 + + return reward + + +def gripper_open_penalty_when_far( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg, + handle_name: str, + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + dist_threshold: float = 0.03, # 抓取豁免区(在此距离内不惩罚) + # max_penalty_dist: float = 0.2, # 这个参数现在没用了,或者可以用来控制斜率,这里简化掉 +) -> torch.Tensor: + """ + 修正后的惩罚项: + 在 dist_threshold (3cm) 之外,如果夹爪没有完全张开,则给予惩罚。 + """ + robot = env.scene[robot_cfg.name] + dryingbox = env.scene[dryingbox_cfg.name] + + # 1. 获取位置 + handle_ids, _ = dryingbox.find_bodies([handle_name]) + handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3] + + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + # pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + # pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + + # gripper_center = (pos_L + pos_R) / 2.0 + # distance = torch.norm(gripper_center - handle_pos, dim=1) + + # 优化计算:直接用预计算的指尖中心(如果你的环境支持),或者保持原样 + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + gripper_center = (pos_L + pos_R) / 2.0 + distance = torch.norm(gripper_center - handle_pos, dim=1) + + # 2. 计算闭合程度 (0=全开, 1=全闭) + # 获取关节位置更直接,比算指尖距离更准 + joint_ids, _ = robot.find_joints(joint_names) + joint_pos = robot.data.joint_pos[:, joint_ids] + # 假设 0.0 是开,0.03 是闭。取平均绝对值归一化。 + # closedness 越大表示闭合越多 + closedness = torch.clamp(torch.mean(torch.abs(joint_pos), dim=1) / 0.03, 0.0, 1.0) + + # 3. 惩罚门控逻辑 (修正的核心) + # 如果 distance > dist_threshold,开始惩罚 + # 使用 tanh 或 sigmoid 平滑过渡,避免硬开关导致的震荡 + # 当 dist = 0.03 时,gate = 0 + # 当 dist = 0.05 时,gate 接近 1 + is_far_gate = torch.tanh((distance - dist_threshold) * 100.0) + # clip 到 [0, 1],防止 distance < threshold 时出现负数干扰 + is_far_gate = torch.clamp(is_far_gate, 0.0, 1.0) + + # 4. 计算最终惩罚 + # 只有在 "远处" 且 "闭合" 时才扣分 + penalty = is_far_gate * closedness + + return -penalty + +# def gripper_open_penalty_when_far( +# env: ManagerBasedRLEnv, +# dryingbox_cfg: SceneEntityCfg, +# robot_cfg: SceneEntityCfg, +# handle_name: str, +# left_gripper_name: str, +# right_gripper_name: str, +# joint_names: list, +# dist_threshold: float = 0.03, # 3cm 抓取保护区 +# max_penalty_dist: float = 0.2, # 0.55m 惩罚激活边界 +# ) -> torch.Tensor: +# """ +# 使用 Sigmoid 激活函数的惩罚项: +# 1. 距离把手 < 0.55m 时,惩罚迅速激活并保持在高位 (接近 1.0)。 +# 2. 距离把手 < 0.03m 时,惩罚强制清零,允许闭合抓取。 +# 3. 两手指距离越小,惩罚值越大。 +# """ +# robot = env.scene[robot_cfg.name] +# dryingbox = env.scene[dryingbox_cfg.name] + +# # 1. 获取位置信息 +# handle_ids, _ = dryingbox.find_bodies([handle_name]) +# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3] + +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# # 2. 计算距离 +# gripper_center = (pos_L + pos_R) / 2.0 +# distance = torch.norm(gripper_center - handle_pos, dim=1) +# finger_dist = torch.norm(pos_L - pos_R, dim=-1) + +# # 3. 距离激活门控 (Sigmoid 方法) +# # 当 distance < 0.55 时,(0.55 - dist) 为正,sigmoid 趋近于 1.0 +# # 50.0 是陡峭系数,系数越大,0.55m 处的切换越像开关 +# dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0) + +# # 4. 抓取区保护 (Deadzone) +# # 距离小于 3cm 时,is_not_near 变为 0,完全撤销惩罚 +# is_not_near = (distance > dist_threshold).float() + +# # 5. 闭合程度计算 (0 = 全开, 1 = 全闭) +# # 假设手指最大张开距离为 0.06m +# max_open = 0.06 +# closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0) + +# # 6. 综合计算并 Clamp +# # 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时,值才会很大 +# penalty_magnitude = dist_gate * is_not_near * closedness + +# # 返回负值作为惩罚 (值域 [-1, 0]) +# return -torch.clamp(penalty_magnitude, 0.0, 1.0) + + +# def gripper_open_until_near( +# env: ManagerBasedRLEnv, +# dryingbox_cfg: SceneEntityCfg, +# robot_cfg: SceneEntityCfg, +# handle_name: str, +# left_gripper_name: str, +# right_gripper_name: str, +# joint_names: list, +# dist_threshold: float = 0.03, # 3cm 阈值 +# ) -> torch.Tensor: +# """奖励夹爪在远离目标时保持打开状态,接近后不再约束。""" +# robot = env.scene[robot_cfg.name] +# dryingbox = env.scene[dryingbox_cfg.name] + +# # 1. 计算夹爪中心到把手的距离 +# handle_ids, _ = dryingbox.find_bodies([handle_name]) +# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3] + +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0 + +# distance = torch.norm(gripper_center - handle_pos, dim=1) + +# # 2. 获取夹爪当前关节位置 (假设 0 是完全打开) +# joint_ids, _ = robot.find_joints(joint_names) +# # 计算当前关节位置与 0(打开状态)的偏差 +# joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) +# mean_error = torch.mean(joint_pos_abs, dim=1) + +# # 3. 计算奖励逻辑 +# # 距离 > 3cm 时:关节越接近 0(打开),奖励越高 (最大为 1.0) +# # 距离 <= 3cm 时:奖励为 0,不干涉模型闭合 +# is_far = distance > dist_threshold +# open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减 + +# return torch.where(is_far, open_reward, torch.zeros_like(open_reward)) + +# ... 在文件末尾添加以下函数 ... + +def gripper_grasp_stability( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg, + joint_names: list, + vel_threshold: float = 0.005 # 极低的速度阈值,判定为“静止” +) -> torch.Tensor: + """ + 奖励夹爪在闭合状态下的稳定性。 + 只有当关节速度极小时(即没有在抖动或移动),才发放奖励。 + """ + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + + # 获取夹爪关节的绝对速度 + joint_vel = torch.abs(robot.data.joint_vel[:, joint_ids]) + + # 判定是否所有夹爪关节都处于“静止”状态 + is_stable = torch.all(joint_vel < vel_threshold, dim=1) + + # 只有在闭合状态下谈稳定性才有意义,所以通常建议配合 is_grasped 使用 + # 这里我们只返回稳定性系数 + return is_stable.float() + +def gripper_close_interaction_v2( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg, + handle_name: str, + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + target_close_pos: float = 0.012, + height_offset: float = 0.00, + grasp_radius: float = 0.05 +) -> torch.Tensor: + """ + 增强版闭合奖励:结合了位置、闭合程度和物理稳定性。 + """ + # 1. 基础抓取判定 + is_grasped = _is_grasped( + env, dryingbox_cfg, robot_cfg, handle_name, + left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算闭合程度 (0.0 到 1.0) + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + mean_joint_pos = torch.mean(current_joint_pos, dim=1) + clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0) + + # 3. 计算稳定性因子 (速度越快,奖励衰减越厉害) + joint_vel = torch.norm(robot.data.joint_vel[:, joint_ids], dim=1) + # 使用 exp(-x) 让奖励对速度非常敏感,速度稍大奖励就迅速消失 + stability_factor = torch.exp(-20.0 * joint_vel) + + # 综合奖励:必须满足抓取条件,且闭合得越深、动作越稳,分数越高 + return torch.where(is_grasped, clamping_factor * stability_factor, torch.zeros_like(clamping_factor)) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1 b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1 new file mode 100644 index 0000000..a1f1702 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1 @@ -0,0 +1,967 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize joint position deviation from a target value.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + # wrap the joint positions to (-pi, pi) + joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids]) + # compute the reward + return torch.sum(torch.square(joint_pos - target), dim=1) + + +def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + """Reward the agent for lifting the lid above the minimal height.""" + lid: RigidObject = env.scene[lid_cfg.name] + # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,) + height = lid.data.root_pos_w[:, 2] + return torch.where(height > minimal_height, 1.0, 0.0) + +def gripper_lid_distance(env: ManagerBasedRLEnv, std: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for reaching the lid using tanh-kernel.""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # float 化,防止成为 (3,) 向量 + if not isinstance(std, float): + if torch.is_tensor(std): + std = std.item() + else: + std = float(std) + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + # body_idx = robot.find_bodies(gripper_body_name)[0] + # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1) + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :] + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6 + + return 1 - torch.tanh(distance / std) + + +def lid_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for grasping the lid (close and lifted).""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + + # Check if close and lifted + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > 0.1 + + return torch.where(is_close & is_lifted, 1.0, 0.0) + +def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Combined reward for reaching the lid AND lifting it.""" + # Get reaching reward + reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name) + # Get lifting reward + lift_reward = lid_is_lifted(env, minimal_height, lid_cfg) + # Combine rewards multiplicatively + return reach_reward * lift_reward + + +def gripper_handle_orientation_alignment( + env, + dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + handle_name: str = "Equipment_BB_13_C", + gripper_body_name: str = "right_hand_body", + gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 夹爪指尖指向 (Local Z) + gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 夹爪手指开合轴 (Local Y) + lid_handle_axis: tuple = (0.0, 0.0, 1.0), + max_angle_deg: float =60.0, + # handle_local_axis 参数已删除,因为直接使用世界坐标系 Z 轴更稳健 +) -> torch.Tensor: + """ + 修正版: + 1. 解决了梯度消失:使用 (dot+1)/2 替代 clamp,保证全角度有梯度。 + 2. 解决了轴向错误:直接认定手柄是垂直的 (World Z),修复抓取姿态。 + """ + # 1. 获取数据 + # dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它,节省计算) + robot = env.scene[robot_cfg.name] + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + + box=env.scene[dryingbox_cfg.name] + hd_ids,_=box.find_bodies([handle_name],preserve_order=True) + handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:] + + # --- 约束 A: 指向约束 (Pointing) --- + # 目标:夹爪 Z 轴 指向 World +X + grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) + grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) + + target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1) + + dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1) + dot_fwd=torch.clamp(dot_fwd,-1.0,1.0) + angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0)) + max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device) + angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0) + rew_forward = 1.0 - angle_normalized # 角度越小,奖励越大 + # 如果角度超过60度,奖励为0 + rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward)) + # nan -> 0 + rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward) + + # [关键修改 1] 使用软化奖励,提供全局梯度 + # 范围 [0, 1]: 1.0 是完美对齐,0.5 是垂直,0.0 是完全背对 + # 这样即使现在是指向地面的 (dot=0),它也有 0.5 分,且知道往上抬分会变高 + # rew_forward = (dot_fwd + 1.0) / 2.0 + # 如果你希望能更陡峭一点,可以加个平方: rew_forward = rew_forward * rew_forward + + # --- 约束 B: 抓取姿态约束 (Grasp Alignment) --- + # 目标:夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z) + # 这样手指就是水平开合的 + + grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) + grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local)) + + # [关键修改 2] 强制假设手柄是垂直的 (World Z) + # 除非你的烘箱会倾斜倒在地上,否则这是最稳健的写法,不用管局部坐标系 + handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1) + handle_axis_world=quat_apply(handle_quat_w,handle_axis_local) + dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1) + rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4 + # nan -> 0 + rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + + # 5. 组合 + total_reward = rew_forward * rew_yaw + + total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + # debug + if not torch.isfinite(total_reward).all(): + print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") + print(f"rew_vertical: {rew_vertical}") + print(f"rew_yaw: {rew_yaw}") + + return total_reward + + # # 计算点积:我们希望它们垂直,即点积为 0 + # dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1) + + # # 奖励:1.0 - abs(dot)。点积越小(越垂直),分数越高 + # rew_perpendicular = 1.0 - torch.abs(dot_width_handle) + + # # 稍微增强一下敏感度,让它更倾向于完美垂直 + # rew_perpendicular = rew_perpendicular * rew_perpendicular + + # # 3. 组合奖励 + # total_reward = rew_forward * rew_perpendicular + + # return torch.nan_to_num(total_reward, nan=0.0) + + +# def gripper_handle_position_alignment(env: ManagerBasedRLEnv, +# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# handle_name: str = "Equipment_BB_13_C", +# left_gripper_name: str = "left_hand__l", +# right_gripper_name: str = "left_hand_r", +# height_offset: float = 0.03, +# std: float = 0.1) -> torch.Tensor: + +# dryingbox: RigidObject = env.scene[dryingbox_cfg.name] +# robot: Articulation = env.scene[robot_cfg.name] + +# # lid_pos_w = lid.data.root_pos_w[:, :3] +# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) +# # [num_envs, 3] +# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] +# # 目标点:lid位置 + height_offset(Z方向) +# target_pos = handle_pos_w.clone() +# # print(target_pos[0]) +# target_pos[:, 0] -= height_offset # 在lid上方height_offset处 +# # print(target_pos[0]) +# # 获取两个夹爪的位置 +# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) +# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + +# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) +# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + +# # 计算夹爪中心位置 +# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) +# # print(gripper_center[0]) +# # 计算夹爪中心到目标点的3D距离 +# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + +# # 距离奖励:距离越小,奖励越大 +# position_reward = 1.0 - torch.tanh(distance / std) + +# return position_reward + + +# def gripper_handle_position_alignment( +# env, +# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# handle_name: str = "Equipment_BB_13_C", +# left_finger_name: str = "right_hand_l", +# right_finger_name: str = "right_hand__r", +# # 【修改点1】:根据你的描述,长轴是 Local Z,所以改为 (0, 0, 1) +# handle_local_axis: tuple = (0.0, 0.0, 1.0), +# # 【新增】:明确指定手柄的“侧面”是 Local X 轴 +# handle_local_side_axis: tuple = (0.0, -1.0, 0.0), +# grasp_width: float = 0.03, +# grasp_offset: float = 0.06, +# std: float = 0.05, +# ) -> torch.Tensor: +# """ +# 修正版: +# 1. 修正了手柄长轴为 Local Z。 +# 2. 使用 Local X 作为手柄两侧的基准方向。 +# """ +# # 1. 获取数据 +# dryingbox = env.scene[dryingbox_cfg.name] +# robot = env.scene[robot_cfg.name] + +# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) + +# # [num_envs, 3] +# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] +# # [num_envs, 4] +# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :] + +# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True) +# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True) +# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :] +# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :] + +# # 2. 计算关键向量 +# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z) +# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1) +# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励 +# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) + +# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y) +# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转 +# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1) +# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec)) + +# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱) +# # 注意:如果你的烘箱放置位置旋转了,这里也建议改为读取机器人的前方或者物体的Local方向 +# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1) + +# # 3. 计算理想的抓取点 +# # 抓取中心:从手柄中心沿着抓取方向(World X)反向退后 grasp_offset +# grasp_center = handle_pos_w - approach_dir * grasp_offset + +# # 计算理想的左/右手指位置 +# # 既然 side_vec_world 是 Local X (对应 World Y) +# # 我们希望手指在 Y 轴两侧张开 +# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0) +# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0) + +# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧 +# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。 + +# # 4. 计算距离奖励 +# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1) +# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1) + +# rew_l = 1.0 - torch.tanh(dist_l / std) +# rew_r = 1.0 - torch.tanh(dist_r / std) + +# total_reward = (rew_l + rew_r) / 2.0 + +# return torch.nan_to_num(total_reward, nan=0.0) + +# def gripper_handle_position_alignment( +# env, +# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# handle_name: str = "Equipment_BB_13_C", +# left_finger_name: str = "right_hand_l", # 左手指 body 名称 +# right_finger_name: str = "right_hand__r", # 右手指 body 名称 +# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴 +# grasp_width: float = 0.03, # 预期的抓取宽度 (米) +# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移 +# std: float = 0.05, # 奖励函数的敏感度 +# ) -> torch.Tensor: +# """ +# 双指位置奖励:引导左/右手指分别到达手柄的两侧。 +# """ +# # 1. 获取数据 +# dryingbox = env.scene[dryingbox_cfg.name] +# robot = env.scene[robot_cfg.name] + +# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) +# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] +# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :] + +# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True) +# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True) +# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :] +# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :] + +# # 2. 计算理想的抓取点 +# # 获取手柄当前轴向 +# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1) +# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec)) + +# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱) +# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1) + +# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向 +# side_vec = normalize(torch.cross(approach_dir, handle_axis_world)) + +# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间) +# grasp_center = handle_pos_w - approach_dir * grasp_offset + +# # 计算理想的左/右手指位置 (在中心点两侧张开) +# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右 +# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0) +# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0) + +# # 3. 计算距离奖励 +# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1) +# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1) + +# rew_l = 1.0 - torch.tanh(dist_l / std) +# rew_r = 1.0 - torch.tanh(dist_r / std) + +# # 取平均值 +# total_reward = (rew_l + rew_r) / 2.0 + +# return torch.nan_to_num(total_reward, nan=0.0) + + +# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name +def _is_grasped( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg, # 改为 dryingbox_cfg + robot_cfg: SceneEntityCfg, + handle_name: str, # 新增 handle_name 参数 + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + height_offset: float, + grasp_radius: float, + target_close_pos: float +) -> torch.Tensor: + """ + 内部辅助函数:判定是否成功抓取。 + 条件: + 1. 夹爪中心在把手目标点附近 (Sphere Check) + 2. 夹爪处于闭合发力状态 (Joint Effort Check) + 3. (关键) 夹爪X轴距离合适,不能压在把手上 (X-Axis Check) + """ + # 1. 获取对象 + dryingbox = env.scene[dryingbox_cfg.name] # 改为 dryingbox + robot = env.scene[robot_cfg.name] + + # 2. 目标点位置 (把手中心) + handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) + handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone() + handle_pos_w[:, 1] += height_offset # X方向偏移 + handle_pos_w[:, 0] += 0.02 + # handle_pos_w[:, 2] -= 0.04 + # 3. 夹爪位置 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + gripper_center = (pos_L + pos_R) / 2.0 + + # 4. 条件一:距离判定 (在把手球范围内) + dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1) + if env.common_step_counter % 100 == 0: + print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m") + is_near = dist_to_handle < grasp_radius + + # 5. 条件二:X轴距离判定 (防止压在把手上) + x_diff = gripper_center[:, 0] - handle_pos_w[:, 0] + is_x_valid = torch.abs(x_diff) < 0.03 # 3cm 容差 + + # 6. 条件三:闭合判定 (正在尝试闭合) + joint_ids, _ = robot.find_joints(joint_names) + joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + + is_effort_closing = torch.all(joint_pos > 0.005, dim=1) + + # 2. 几何排斥判定 (Geometry Check) + dist_fingertips = torch.norm(pos_L - pos_R, dim=-1) + is_not_empty = dist_fingertips > 0.005 + + # 在计算 dist_to_handle 后 + if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15: + delta = gripper_center[0] - handle_pos_w[0] + print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, " + f"center={gripper_center[0].cpu().numpy()}, " + f"delta={delta.cpu().numpy()}, " + f"dist={dist_to_handle[0].item():.4f}, " + f"fingertip_dist={dist_fingertips[0].item():.4f}, " + f"joint_pos={joint_pos[0].cpu().numpy()}") + + # 综合判定 + return is_near & is_x_valid & is_effort_closing & is_not_empty + + +# 修改 gripper_close_when_near 函数 +def gripper_close_when_near( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), # 改为 dryingbox_cfg + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + handle_name: str = "Equipment_BB_13_C", # 新增 handle_name 参数 + left_gripper_name: str = "right_hand_l", # 更新默认值 + right_gripper_name: str = "right_hand__r", # 更新默认值 + joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 更新默认值 + target_close_pos: float = 0.03, + height_offset: float = 0.005, # 更新默认值 + grasp_radius: float = 0.03 # 更新默认值 +) -> torch.Tensor: + + # 1. 判定基础抓取条件是否满足 + is_grasped = _is_grasped( + env, dryingbox_cfg, robot_cfg, handle_name, # 传入 handle_name + left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算夹紧程度 (Clamping Intensity) + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + mean_joint_pos = torch.mean(current_joint_pos, dim=1) + + clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0) + # 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励 + + + + return torch.where(is_grasped, clamping_factor, 0.0) + + + +# def gripper_close_when_near( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 +# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 +# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# target_close_pos: float = 0.03, +# height_offset: float = 0.03, +# grasp_radius: float = 0.02 # 球面半径 2cm +# ) -> torch.Tensor: + +# # 1. 获取位置 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 把手中心 + +# # 获取左右指尖位置 +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# # 计算夹爪中心 +# pos_center = (pos_L + pos_R) / 2.0 + +# # 2. 距离判定 (Is Inside Sphere?) +# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) +# is_in_sphere = (dist_center < grasp_radius).float() + +# # 3. "在中间"判定 (Is Between?) +# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 +# vec_LR = pos_R - pos_L # 左指指向右指 +# vec_LO = lid_pos_w - pos_L # 左指指向物体 + +# # 计算投影比例 t +# # P_proj = P_L + t * (P_R - P_L) +# # t = (vec_LO . vec_LR) / |vec_LR|^2 +# # 如果 0 < t < 1,说明投影在两个指尖之间 + +# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 +# dot = torch.sum(vec_LO * vec_LR, dim=-1) +# t = dot / len_sq + +# is_between = (t > 0.0) & (t < 1.0) +# is_between = is_between.float() + +# # 4. 闭合判定 +# joint_ids, _ = robot.find_joints(joint_names) +# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题 +# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) +# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1) +# # 只有当非常接近闭合目标时才给分 +# is_closing = (close_error < 0.005).float() # 允许 5mm 误差 + +# # 5. 最终奖励 +# # 只有三者同时满足才给 1.0 分 +# reward = is_in_sphere * is_between * is_closing + +# return torch.nan_to_num(reward, nan=0.0) + + +# def lid_is_lifted( +# env:ManagerBasedRLEnv, +# minimal_height:float, +# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid") +# ) -> torch.Tensor: + +# lid = env.scene[lid_cfg.name] +# lid_height = lid.data.root_pos_w[:, 2] +# reward=torch.where(lid_height > minimal_height, 1.0, 0.0) +# reward=torch.nan_to_num(reward, nan=0.0) + +# return reward + + +def lid_is_lifted( + env: ManagerBasedRLEnv, + minimal_height: float = 0.05, # 相对提升阈值 + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + height_offset: float = 0.03, + grasp_radius: float = 0.1, + target_close_pos: float = 0.03, +) -> torch.Tensor: + + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + lid = env.scene[lid_cfg.name] + + # 1. 获取高度 + current_height = lid.data.root_pos_w[:, 2] + # 自动获取初始高度 + initial_height = lid.data.default_root_state[:, 2] + + # 2. 计算提升量 + lift_height = torch.clamp(current_height - initial_height, min=0.0) + + # 3. 速度检查 (防撞飞) + # 如果速度 > 1.0 m/s,视为被撞飞,不给分 + lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1) + is_stable = lid_vel < 1.0 + + # 4. 计算奖励 + # 基础分:高度越高分越高 + shaping_reward = lift_height * 2.0 + # 成功分:超过阈值 + success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0) + + # 组合 + # 必须 is_grasped AND is_stable + reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height)) + + return torch.nan_to_num(reward, nan=0.0) + + + +def lid_lift_success_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand_body", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + # 关键参数 + settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调) + target_lift_height: float = 0.05, # 目标提升高度 (5cm) + grasp_dist_threshold: float = 0.05, # 抓取判定距离 + closed_pos: float = 0.03 # 夹爪闭合阈值 +) -> torch.Tensor: + """ + 提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。 + """ + # 1. 获取数据 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 盖子实时高度 + lid_z = lid.data.root_pos_w[:, 2] + lid_pos = lid.data.root_pos_w[:, :3] + + # 夹爪位置 + body_ids, _ = robot.find_bodies([left_gripper_name]) + gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] + + # 2. 【条件一:是否夹稳 (Is Grasped?)】 + # 距离检查 + distance = torch.norm(gripper_pos - lid_pos, dim=-1) + is_close = distance < grasp_dist_threshold + + # 闭合检查 + joint_ids, _ = robot.find_joints(joint_names) + joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 假设 > 80% 的闭合度算抓紧了 + is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1) + + is_grasped = is_close & is_closed + + # 3. 【条件二:计算相对提升 (Relative Lift)】 + # 当前高度 - 初始稳定高度 + current_lift = lid_z - settled_height + + # 4. 计算奖励 + lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0) + + # 基础提升分 (Shaping Reward) + lift_reward = lift_progress + + # 成功大奖 (Success Bonus) + # 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖 + success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0) + + # 组合:只有在 is_grasped 为 True 时才发放提升奖励 + total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward)) + + # 5. 安全输出 + return torch.nan_to_num(total_reward, nan=0.0) + +def lid_lift_progress_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + initial_height: float = 0.8, + target_height_lift: float = 0.15, + height_offset: float = 0.02, # 必须与抓取奖励保持一致 + grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分 + target_close_pos: float = 0.03, + std: float = 0.05 # 标准差 +) -> torch.Tensor: + + # 1. 判定是否夹住 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算高度 + lid = env.scene[lid_cfg.name] + current_height = lid.data.root_pos_w[:, 2] + lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift) + # print(current_height) + # print(lift_height) + # 3. 计算奖励 + # 只有在 is_grasped 为 True 时,才发放高度奖励 + # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况 + progress = torch.tanh(lift_height / std) + + reward = torch.where(is_grasped, progress, 0.0) + + return reward + +# def lid_grasped_bonus( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg, +# robot_cfg: SceneEntityCfg, +# left_gripper_name: str, +# joint_names: list, +# distance_threshold: float = 0.05, +# closed_pos: float = 0.03 +# ) -> torch.Tensor: +# # 1. 计算距离 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] +# lid_pos = lid.data.root_pos_w[:, :3] +# distance = torch.norm(gripper_pos - lid_pos, dim=-1) + +# # 2. 检查距离是否达标 +# is_close = distance < distance_threshold + +# # 3. 检查夹爪是否闭合 +# joint_ids, _ = robot.find_joints(joint_names) +# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) +# # 检查是否接近闭合 (比如 > 0.02) +# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1) + +# # 4. 给分 +# # 只有同时满足才给 1.0 +# reward = torch.where(is_close & is_closed, 1.0, 0.0) + +# return torch.nan_to_num(reward) + +# 在 rewards.py 文件开头添加调试函数 +def debug_robot_physics( + env: ManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "right_hand_body", + distance_threshold: float = 0.1, # 当距离小于此值时打印 + print_interval: int = 10 # 每N步打印一次 +) -> None: + """ + 调试函数:打印机器人的速度、加速度等信息 + """ + # 使用静态变量控制打印频率 + if not hasattr(debug_robot_physics, "step_count"): + debug_robot_physics.step_count = 0 + debug_robot_physics.step_count += 1 + + if debug_robot_physics.step_count % print_interval != 0: + return + + robot = env.scene[robot_cfg.name] + + # 1. 获取机器人根节点的速度和加速度 + root_lin_vel = robot.data.root_lin_vel_w # (num_envs, 3) + root_ang_vel = robot.data.root_ang_vel_w # (num_envs, 3) + root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1) # (num_envs,) + root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1) # (num_envs,) + + # 2. 获取夹爪body的速度 + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :] # (num_envs, 3) + gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :] # (num_envs, 3) + gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1) + gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1) + + # 3. 获取关节速度(可能影响碰撞) + joint_vel = robot.data.joint_vel # (num_envs, num_joints) + max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0] # (num_envs,) + + # 4. 检查是否有异常高的速度(可能导致碰撞问题) + high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3) + + if torch.any(high_vel_mask): + env_ids = torch.where(high_vel_mask)[0].cpu().tolist() + for env_id in env_ids[:3]: # 只打印前3个环境 + print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:") + print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)") + print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)") + print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)") + print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)") + print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s") + +def gripper_handle_position_alignment(env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + handle_name: str = "Equipment_BB_13_C", + left_gripper_name: str = "right_hand_l", + right_gripper_name: str = "right_hand__r", + height_offset: float = 0.03, + std: float = 0.1) -> torch.Tensor: + + dryingbox: RigidObject = env.scene[dryingbox_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) + handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] + target_pos = handle_pos_w.clone() + target_pos[:, 1] += height_offset + target_pos[:, 0] += 0.02 + # target_pos[:, 2] -= 0.04 + + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # ===== 添加调试打印 ===== + # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息 + close_mask = distance < 0.03 + if torch.any(close_mask): + debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5) + # ======================== + + position_reward = 1.0 - torch.tanh(distance / std) + + # 计算距离后插入 + if env.common_step_counter % 500 == 0: + print("pos_align dbg env0: " + f"handle={handle_pos_w[0].cpu().numpy()}, " + f"target={target_pos[0].cpu().numpy()}, " + f"gripL={left_gripper_pos[0].cpu().numpy()}, " + f"gripR={right_gripper_pos[0].cpu().numpy()}, " + f"center={gripper_center[0].cpu().numpy()}, " + f"dist={distance[0].item():.4f}") + + return position_reward + + +# def gripper_handle_position_alignment(env: ManagerBasedRLEnv, +# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# handle_name: str = "Equipment_BB_13_C", +# left_gripper_name: str = "right_hand_l", +# right_gripper_name: str = "right_hand__r", +# height_offset: float = 0.03, +# std: float = 0.1) -> torch.Tensor: + +# dryingbox: RigidObject = env.scene[dryingbox_cfg.name] +# robot: Articulation = env.scene[robot_cfg.name] + +# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) +# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] +# target_pos = handle_pos_w.clone() +# target_pos[:, 1] += height_offset +# target_pos[:, 0] += 0.02 +# # target_pos[:, 2] -= 0.02 + +# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) +# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + +# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] +# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + +# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 +# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + +# # ===== 添加调试打印 ===== +# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息 +# close_mask = distance < 0.03 +# if torch.any(close_mask): +# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5) +# # ======================== + +# position_reward = 1.0 - torch.tanh(distance / std) + +# # 计算距离后插入 +# if env.common_step_counter % 00 == 0: +# print("pos_align dbg env0: " +# f"handle={handle_pos_w[0].cpu().numpy()}, " +# f"target={target_pos[0].cpu().numpy()}, " +# f"gripL={left_gripper_pos[0].cpu().numpy()}, " +# f"gripR={right_gripper_pos[0].cpu().numpy()}, " +# f"center={gripper_center[0].cpu().numpy()}, " +# f"dist={distance[0].item():.4f}") + +# return position_reward + + +# def gripper_open_when_far( +# env: ManagerBasedRLEnv, +# dryingbox_cfg: SceneEntityCfg, +# robot_cfg: SceneEntityCfg, +# handle_name: str, +# left_gripper_name: str, +# right_gripper_name: str, +# joint_names: list, +# dist_threshold: float = 0.5, +# target_open_pos: float = 0.3, +# ) -> torch.Tensor: +# """ +# 当夹爪距离目标点较远(> dist_threshold)时,奖励夹爪保持打开状态。 +# """ +# # 1. 获取位置 +# dryingbox = env.scene[dryingbox_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 获取手柄位置 +# handle_ids, _ = dryingbox.find_bodies([handle_name]) +# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3] + +# # 获取夹爪中心位置 +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# gripper_center = (pos_L + pos_R) / 2.0 + +# # 2. 计算距离 +# dist = torch.norm(gripper_center - handle_pos, dim=-1) + +# # 3. 获取手指关节位置的绝对值 +# joint_ids, _ = robot.find_joints(joint_names) +# current_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) +# mean_pos_abs = torch.mean(current_pos_abs, dim=1) + +# # 4. 计算奖励:如果距离 > 0.5,奖励 mean_pos_abs 接近 target_open_pos (0.3) +# # 使用 tanh 误差函数,误差越小奖励越高 +# error = torch.abs(mean_pos_abs - target_open_pos) +# reward = 1.0 - torch.tanh(error / 0.1) # 0.1 为缩放因子,可根据需要调整 + +# # 只有距离大于阈值时才发放奖励 +# return torch.where(dist > dist_threshold, reward, torch.zeros_like(reward)) + + +def gripper_open_when_far( + env: ManagerBasedRLEnv, + dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用 + robot_cfg: SceneEntityCfg, + handle_name: str, # 保留接口一致性,但不再使用 + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, # 不再使用 + dist_threshold: float = 0.5, # 不再使用 + target_open_dist: float = 0.06, # 两手指完全打开时的距离(米) +) -> torch.Tensor: + """ + 奖励夹爪最大张开状态: + 只根据左右两根手指之间的实际距离计算奖励, + 当距离接近 target_open_dist (0.06m) 时奖励最大。 + """ + + # 1. 获取机器人 + robot = env.scene[robot_cfg.name] + + # 2. 获取左右手指的世界坐标 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + + # 3. 计算两手指之间的实际距离 + finger_dist = torch.norm(pos_L - pos_R, dim=-1) + + # 4. 奖励:距离越接近最大打开距离,奖励越高 + error = torch.abs(finger_dist - target_open_dist) + reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度 + + return reward diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py index 25f90c9..41c6b8c 100644 --- a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py +++ b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py @@ -40,7 +40,7 @@ TABLE_CFG=RigidObjectCfg( prim_path="{ENV_REGEX_NS}/Table", init_state=RigidObjectCfg.InitialStateCfg( pos=[0.95, -0.3, 0.005], - rot=[0.7071, 0.0, 0.0, 0.7071], + rot=[0.7071, 0.0, 0.0, -0.7071], lin_vel=[0.0, 0.0, 0.0], ang_vel=[0.0, 0.0, 0.0], ), @@ -76,7 +76,7 @@ DRYINGBOX_CFG = ArticulationCfg( ), ), init_state=ArticulationCfg.InitialStateCfg( - pos=(0.95, -0.45, 0.9), + pos=[0.95, -0.45, 0.9], rot=[-0.7071, 0.0, 0.0, 0.7071] ), # actuators={} @@ -157,7 +157,8 @@ ULTRASOUND_CFG = ArticulationCfg( ## # Scene definition ## - +from isaaclab.sensors import CameraCfg +import isaaclab.sim as sim_utils @configclass class PullSceneCfg(InteractiveSceneCfg): @@ -181,6 +182,109 @@ class PullSceneCfg(InteractiveSceneCfg): prim_path="/World/DomeLight", spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), ) + # # ------------- 新增 6 路相机 ------------- + # cam_head = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=1.93, # 使用你给的 1.93 + # # 由 fx = W * f / aperture 推得 aperture = W * f / fx + # horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致 + # clipping_range=(0.1, 1e5), + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.1226748226, 0.0, 0.1130563345), + # rot=(-0.5, 0.5, -0.5, 0.5), + # # convention="ros", + + # # rot=(0.70710678, 0, 0.70710678, 0), + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_chest = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(-0.14909, 0.3549, -0.23295), + # rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),# + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_left_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.0610568560, 1.13e-8, 0.0683504405),#() + # rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0) + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_right_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.0610571709, 2.58e-7, 0.0711255539),#(0.0610568560, 1.13e-8, 0.0683504405) + # rot=(0.445, -0.549, -0.549, -0.445),#(0.99452, 0, 0.10453, 0) + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_top = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/cam_top", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.4627602211, 0.0, 2.0977711170), + # rot=(0.83526, 0, 0.54986, 0), + # convention="ros", + # ), + # data_types=["rgb"], + # width=960, + # height=720, + # update_latest_camera_pose=False, + # ) + + # cam_side = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/cam_side", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.76709, -0.16995, 1.13007), + # rot=(0.61427, 0, 0.02598, 0.78866), + # convention="ros", + # ), + # data_types=["rgb"], + # width=960, + # height=720, + # update_latest_camera_pose=False, + # ) + # # ------------- 相机结束 ------------- ## @@ -222,21 +326,6 @@ class ActionsCfg: scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), ) - # left_arm_fixed = mdp.JointPositionActionCfg( - # asset_name="Mindbot", - # joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6 - - # # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动 - # scale=0.0, - # offset={ - # "l_joint1": 2.3562, - # "l_joint2": -1.2217, - # "l_joint3": -1.5708, - # "l_joint4": 1.5708, - # "l_joint5": 1.5708, - # "l_joint6": -1.2217, - # }, - # ) grippers_position = mdp.BinaryJointPositionActionCfg( asset_name="Mindbot", @@ -317,16 +406,12 @@ class EventCfg: params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) - # reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset", - # params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) - # reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset", - # params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) @configclass class RewardsCfg: """Reward terms for the MDP.""" - + # 1. 姿态对齐:保持不变,这是基础 gripper_handle_orientation_alignment = RewTerm( func=mdp.gripper_handle_orientation_alignment, params={ @@ -337,15 +422,12 @@ class RewardsCfg: "gripper_forward_axis": (0.0, 0.0, 1.0), "gripper_width_axis": (0.0, 1.0, 0.0), "lid_handle_axis": (0.0, 0.0, 1.0), - # 删除了 handle_axis,因为新函数中不再使用它 - "max_angle_deg": 85.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松 + "max_angle_deg": 60.0, # 稍微放宽一点,或者保持 30.0,看前期训练难度 }, - weight=5.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力 + weight=4.0 ) - - # stage 2 - # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m - + # 2. 位置对齐 (合并了之前的粗略和精细对齐) + # std=0.05 提供了从 15cm 到 0cm 全程较好的梯度,解决了之前的梯度消失问题 gripper_handle_position_alignment = RewTerm( func=mdp.gripper_handle_position_alignment, params={ @@ -354,29 +436,31 @@ class RewardsCfg: "handle_name":"Equipment_BB_13_C", "left_gripper_name": "right_hand_l", "right_gripper_name": "right_hand__r", - "height_offset": 0.00, # Z方向:lid 上方 0.1m - "std": 0.2, # 位置对齐的容忍度0.3 + "height_offset": 0.00, + "std": 0.05, # 关键修改:0.3太松,0.03太紧导致梯度消失,0.05是平衡点 }, - weight=3.0 + weight=6.0 # 提高权重,作为主导奖励 ) - - # 【新增】精细对齐 (引导进入 2cm 圈) - gripper_handle_fine_alignment = RewTerm( - func=mdp.gripper_handle_position_alignment, + # 3. 远距离闭合惩罚 (逻辑已修正) + # 现在它只惩罚“在远处就闭眼”的行为,不会阻挡机器人靠近 + gripper_closed_penalty = RewTerm( + func=mdp.gripper_open_penalty_when_far, params={ "dryingbox_cfg": SceneEntityCfg("dryingbox"), "robot_cfg": SceneEntityCfg("Mindbot"), - "handle_name":"Equipment_BB_13_C", + "handle_name": "Equipment_BB_13_C", "left_gripper_name": "right_hand_l", "right_gripper_name": "right_hand__r", - "height_offset": 0.00, # Z方向:lid 上方 0.1m - "std": 0.05, # 位置对齐的容忍度0.05 + "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + "dist_threshold": 0.06, # 2.5cm 保护区,进入后允许闭合 }, - weight=10.0 # 高权重 + weight=0.5 # 权重降低,作为辅助约束,不要喧宾夺主 ) + # ... 在 RewardsCfg 类中修改 ... + # 4. 近距离抓取交互 (改用 V2 版本或增加稳定性项) gripper_close_interaction = RewTerm( - func=mdp.gripper_close_when_near, + func=mdp.gripper_close_interaction_v2, # 切换到带稳定性的版本 params={ "dryingbox_cfg": SceneEntityCfg("dryingbox"), "robot_cfg": SceneEntityCfg("Mindbot"), @@ -384,34 +468,171 @@ class RewardsCfg: "left_gripper_name": "right_hand_l", "right_gripper_name": "right_hand__r", "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], - "target_close_pos": 0.02, - "height_offset": 0.00,#0.07 - "grasp_radius": 0.05,#original 0.03 + "target_close_pos": 0.012, + "height_offset": 0.00, + "grasp_radius": 0.05, }, - weight=10.0 + weight=8.0 # 适当调低一点点,为稳定性奖励腾出空间 ) - # lid_lifted_reward = RewTerm( - # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 + # 5. 新增:抓取稳定性奖励 (核心:惩罚抖动) + gripper_grasp_stability = RewTerm( + func=mdp.gripper_grasp_stability, + params={ + "robot_cfg": SceneEntityCfg("Mindbot"), + "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + "vel_threshold": 0.005, + }, + weight=4.0 # 只要保持静止就一直给分 + ) + # 4. 近距离抓取交互 (修正了目标值) + # gripper_close_interaction = RewTerm( + # func=mdp.gripper_close_when_near, # params={ - # "lid_cfg": SceneEntityCfg("lid"), - # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 + # "dryingbox_cfg": SceneEntityCfg("dryingbox"), + # "robot_cfg": SceneEntityCfg("Mindbot"), + # "handle_name":"Equipment_BB_13_C", + # "left_gripper_name": "right_hand_l", + # "right_gripper_name": "right_hand__r", + # "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + + # # 关键修改:物理接触点计算 + # # 夹爪空载全闭是0.03 (对应0cm) + # # 把手宽3.88cm -> 单侧行程 (6-3.88)/2 = 1.06cm = 0.0106m + # # 设为 0.012 确保接触时计算出的奖励是满分 (ratio >= 1.0) + # "target_close_pos": 0.012, + + # "height_offset": 0.00, + # "grasp_radius": 0.05, # }, - # weight=10.0 # 给一个足够大的权重 + # weight=10.0 # 只有成功抓到才给的大奖 # ) - # lid_lifting_reward = RewTerm( - # func=mdp.lid_lift_progress_reward, - # params={ - # "lid_cfg": SceneEntityCfg("lid"), - # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 - # "target_height_lift": 0.25, - # "height_offset": 0.07, # 与其他奖励保持一致 - # "std": 0.1 - # }, - # # 权重设大一点,告诉它这是最终目标 - # weight=10.0 - # ) +# @configclass +# class RewardsCfg: +# """Reward terms for the MDP.""" + +# gripper_handle_orientation_alignment = RewTerm( +# func=mdp.gripper_handle_orientation_alignment, +# params={ +# "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# "robot_cfg": SceneEntityCfg("Mindbot"), +# "handle_name": "Equipment_BB_13_C", +# "gripper_body_name": "right_hand_body", +# "gripper_forward_axis": (0.0, 0.0, 1.0), +# "gripper_width_axis": (0.0, 1.0, 0.0), +# "lid_handle_axis": (0.0, 0.0, 1.0), +# # 删除了 handle_axis,因为新函数中不再使用它 +# "max_angle_deg": 30.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松 +# }, +# weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力 +# ) + +# # stage 2 +# # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m + +# gripper_handle_position_alignment = RewTerm( +# func=mdp.gripper_handle_position_alignment, +# params={ +# "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# "robot_cfg": SceneEntityCfg("Mindbot"), +# "handle_name":"Equipment_BB_13_C", +# "left_gripper_name": "right_hand_l", +# "right_gripper_name": "right_hand__r", +# "height_offset": 0.00, # Z方向:lid 上方 0.1m +# "std": 0.3, # 位置对齐的容忍度0.3 ###0.1 +# }, +# weight=3.0 +# ) + +# # 强制远距离打开奖励 +# # 远距离闭合惩罚 +# gripper_closed_penalty = RewTerm( +# func=mdp.gripper_open_penalty_when_far, +# params={ +# "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# "robot_cfg": SceneEntityCfg("Mindbot"), +# "handle_name": "Equipment_BB_13_C", +# "left_gripper_name": "right_hand_l", +# "right_gripper_name": "right_hand__r", +# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], +# "dist_threshold": 0.02, # 3cm 以外不允许闭合 +# "max_penalty_dist":0.055 #0.055 +# }, +# weight=1.0 # 提高权重,让扣分很痛8.0 +# ) + + +# # 【新增】精细对齐 (引导进入 2cm 圈) +# gripper_handle_fine_alignment = RewTerm( +# func=mdp.gripper_handle_position_alignment, +# params={ +# "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# "robot_cfg": SceneEntityCfg("Mindbot"), +# "handle_name":"Equipment_BB_13_C", +# "left_gripper_name": "right_hand_l", +# "right_gripper_name": "right_hand__r", +# "height_offset": 0.00, # Z方向:lid 上方 0.1m +# "std": 0.03, # 位置对齐的容忍度0.05 +# }, +# weight=5.0 # 高权重 +# ) + +# # gripper_close_interaction = RewTerm( +# # func=mdp.gripper_close_when_near, +# # params={ +# # "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# # "robot_cfg": SceneEntityCfg("Mindbot"), +# # "handle_name": "Equipment_BB_13_C", +# # "left_gripper_name": "right_hand_l", +# # "right_gripper_name": "right_hand__r", +# # "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], +# # # 关键匹配把手尺寸 +# # "handle_thickness": 0.0388, # 把手厚度(X/宽度向抓取方向) +# # "thickness_tol": 0.008, # 误差容忍,约 0.8cm +# # "grasp_radius": 0.04, # 进入此半径才计算闭合奖励 +# # "height_offset": 0.00, +# # "target_close_pos": 0.03, # 保留给关节闭合幅度 +# # }, +# # weight=10.0 +# # ) +# gripper_close_interaction = RewTerm( +# func=mdp.gripper_close_when_near, +# params={ +# "dryingbox_cfg": SceneEntityCfg("dryingbox"), +# "robot_cfg": SceneEntityCfg("Mindbot"), +# "handle_name":"Equipment_BB_13_C", +# "left_gripper_name": "right_hand_l", +# "right_gripper_name": "right_hand__r", +# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], +# "target_close_pos": 0.03, +# "height_offset": 0.00,#0.07 +# "grasp_radius": 0.04,#original 0.03 +# }, +# weight=10.0 +# ) + +# # lid_lifted_reward = RewTerm( +# # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 +# # params={ +# # "lid_cfg": SceneEntityCfg("lid"), +# # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 +# # }, +# # weight=10.0 # 给一个足够大的权重 +# # ) + +# # lid_lifting_reward = RewTerm( +# # func=mdp.lid_lift_progress_reward, +# # params={ +# # "lid_cfg": SceneEntityCfg("lid"), +# # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 +# # "target_height_lift": 0.25, +# # "height_offset": 0.07, # 与其他奖励保持一致 +# # "std": 0.1 +# # }, +# # # 权重设大一点,告诉它这是最终目标 +# # weight=10.0 +# # ) diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak new file mode 100644 index 0000000..fe15f7e --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak @@ -0,0 +1,681 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from re import T +from tkinter import N +import torch +from isaaclab.actuators import ImplicitActuatorCfg +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg + +from . import mdp + +## +# Pre-defined configs +## + +from mindbot.robot.mindbot import MINDBOT_CFG + +# ====== 其他物体配置 ====== +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg + + +TABLE_CFG=RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, -0.3, 0.005], + rot=[0.7071, 0.0, 0.0, -0.7071], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd", + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +DRYINGBOX_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0 + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False,# + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # fix_root_link=True, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=[0.95, -0.45, 0.9], + rot=[-0.7071, 0.0, 0.0, 0.7071] + ), + # actuators={} + actuators={ + "passive_damper": ImplicitActuatorCfg( + # ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他) + joint_names_expr=["RevoluteJoint"], + + stiffness=10000.0, + damping=1000.0, + effort_limit_sim=10000.0, + velocity_limit_sim=100.0, + ), + } +) + +LID_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Lid", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.2, 0.65, 0.9], + rot=[1.0, 0.0, 0.0, 0.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + + +ULTRASOUND_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0 + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False,# + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # fix_root_link=True, + ), + ), + init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)), + # actuators={} + actuators={ + "passive_damper": ImplicitActuatorCfg( + # ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他) + joint_names_expr=[".*"], + + stiffness=0.0, + damping=1000.0, + effort_limit_sim=10000.0, + velocity_limit_sim=100.0, + ), + } +) + +## +# Scene definition +## +from isaaclab.sensors import CameraCfg +import isaaclab.sim as sim_utils + +@configclass +class PullSceneCfg(InteractiveSceneCfg): + """Configuration for a cart-pole scene.""" + + # ground plane + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)), + ) + + # robot + Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot") + table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table") + dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox") + # ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound") + # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + + # lights + dome_light = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), + ) + + # # ------------- 修改后的相机配置 (直接使用 USD 内置相机) ------------- + + # # 1. 头部相机 + # # USD 路径: /root/robot_head/cam_head + # cam_head = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_latest_camera_pose=True, + # data_types=["rgb"], + # width=1280, + # height=720, + # # 移除 spawn 和 offset,直接使用 USD 中的参数和位置 + # ) + + # # 2. 胸部相机 + # # USD 路径: /root/robot_trunk/cam_chest + # cam_chest = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + # update_latest_camera_pose=True, + # data_types=["rgb"], + # width=1280, + # height=720, + # ) + + # # 3. 左手相机 + # # USD 路径: /root/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand + # cam_left_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + # update_latest_camera_pose=True, + # data_types=["rgb"], + # width=1280, + # height=720, + # ) + + # # 4. 右手相机 + # # USD 路径: /root/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand + # cam_right_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + # update_latest_camera_pose=True, + # data_types=["rgb"], + # width=1280, + # height=720, + # ) + + # ------------- 相机结束 ------------- + + + # # ------------- 新增 6 路相机 ------------- + # cam_head = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=1.93, # 使用你给的 1.93 + # # 由 fx = W * f / aperture 推得 aperture = W * f / fx + # horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致 + # clipping_range=(0.1, 1e5), + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.1226748226, 0.0, 0.1130563345), + # rot=(-0.5, 0.5, -0.5, 0.5), + # # convention="ros", + + # # rot=(0.70710678, 0, 0.70710678, 0), + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_chest = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(-0.14909, 0.3549, -0.23295), + # rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),# + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_left_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.0610568560, 1.13e-8, 0.0683504405),#() + # rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0) + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_right_hand = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.0610571709, 2.58e-7, 0.0711255539),#(0.0610568560, 1.13e-8, 0.0683504405) + # rot=(0.445, -0.549, -0.549, -0.445),#(0.99452, 0, 0.10453, 0) + # convention="ros", + # ), + # data_types=["rgb"], + # width=1280, + # height=720, + # update_latest_camera_pose=True, + # ) + + # cam_top = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/cam_top", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.4627602211, 0.0, 2.0977711170), + # rot=(0.83526, 0, 0.54986, 0), + # convention="ros", + # ), + # data_types=["rgb"], + # width=960, + # height=720, + # update_latest_camera_pose=False, + # ) + + # cam_side = CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/cam_side", + # spawn=sim_utils.PinholeCameraCfg( + # focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5) + # ), + # offset=CameraCfg.OffsetCfg( + # pos=(0.76709, -0.16995, 1.13007), + # rot=(0.61427, 0, 0.02598, 0.78866), + # convention="ros", + # ), + # data_types=["rgb"], + # width=960, + # height=720, + # update_latest_camera_pose=False, + # ) + # # ------------- 相机结束 ------------- + + +## +# MDP settings +## + +def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True) + pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins + return pos_w + +def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True) + return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins + +# 新增:通用获取 body 世界姿态的函数 +def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True) + return asset.data.body_quat_w[:, body_ids[0]] + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + # 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维) + right_arm_ee = DifferentialInverseKinematicsActionCfg( + asset_name="Mindbot", + joint_names=["r_joint[1-6]"], # 左臂的6个关节 + body_name="right_hand_body", # 末端执行器body名称 + controller=DifferentialIKControllerCfg( + command_type="pose", # 控制位置+姿态 + use_relative_mode=True, # 相对模式:动作是增量 + ik_method="dls", # Damped Least Squares方法 + ), + scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), + ) + + + grippers_position = mdp.BinaryJointPositionActionCfg( + asset_name="Mindbot", + joint_names=["right_hand_joint_left", "right_hand_joint_right"], + # 修正:使用字典格式 + # open_command_expr={"关节名正则": 值} + open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0}, + # close_command_expr={"关节名正则": 值} + close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03}, + ) + + trunk_position = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["PrismaticJoint"], + scale=0.00, + offset=0.25, + clip=None + ) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + + right_gripper_pos = ObsTerm(func=right_gripper_pos_w, + params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])}) + dryingbox_handle_pos = ObsTerm( + func=get_body_pos_rel, + params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])} + ) + dryingbox_handle_quat = ObsTerm( + func=get_body_quat_w, + params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])} + ) + # lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}) + # ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")}) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + # 重置所有关节到 init_state(无偏移) + reset_mindbot_all_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "position_range": (0.0, 0.0), + "velocity_range": (0.0, 0.0), + }, + ) + # 只对左臂添加随机偏移 + reset_mindbot_right_arm = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]), + "position_range": (0.0, 0.0), + "velocity_range": (0.0, 0.0), + }, + ) + reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", + params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) + reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", + params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + gripper_handle_orientation_alignment = RewTerm( + func=mdp.gripper_handle_orientation_alignment, + params={ + "dryingbox_cfg": SceneEntityCfg("dryingbox"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "handle_name": "Equipment_BB_13_C", + "gripper_body_name": "right_hand_body", + "gripper_forward_axis": (0.0, 0.0, 1.0), + "gripper_width_axis": (0.0, 1.0, 0.0), + "lid_handle_axis": (0.0, 0.0, 1.0), + # 删除了 handle_axis,因为新函数中不再使用它 + "max_angle_deg": 30.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松 + }, + weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力 + ) + + # stage 2 + # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m + + gripper_handle_position_alignment = RewTerm( + func=mdp.gripper_handle_position_alignment, + params={ + "dryingbox_cfg": SceneEntityCfg("dryingbox"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "handle_name":"Equipment_BB_13_C", + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "height_offset": 0.00, # Z方向:lid 上方 0.1m + "std": 0.3, # 位置对齐的容忍度0.3 ###0.1 + }, + weight=3.0 + ) + + # 强制远距离打开奖励 + # 远距离闭合惩罚 + gripper_closed_penalty = RewTerm( + func=mdp.gripper_open_penalty_when_far, + params={ + "dryingbox_cfg": SceneEntityCfg("dryingbox"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "handle_name": "Equipment_BB_13_C", + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + "dist_threshold": 0.02, # 3cm 以外不允许闭合 + "max_penalty_dist":0.055 #0.055 + }, + weight=1.0 # 提高权重,让扣分很痛8.0 + ) + + + # 【新增】精细对齐 (引导进入 2cm 圈) + gripper_handle_fine_alignment = RewTerm( + func=mdp.gripper_handle_position_alignment, + params={ + "dryingbox_cfg": SceneEntityCfg("dryingbox"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "handle_name":"Equipment_BB_13_C", + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "height_offset": 0.00, # Z方向:lid 上方 0.1m + "std": 0.03, # 位置对齐的容忍度0.05 + }, + weight=5.0 # 高权重 + ) + + # gripper_close_interaction = RewTerm( + # func=mdp.gripper_close_when_near, + # params={ + # "dryingbox_cfg": SceneEntityCfg("dryingbox"), + # "robot_cfg": SceneEntityCfg("Mindbot"), + # "handle_name": "Equipment_BB_13_C", + # "left_gripper_name": "right_hand_l", + # "right_gripper_name": "right_hand__r", + # "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + # # 关键匹配把手尺寸 + # "handle_thickness": 0.0388, # 把手厚度(X/宽度向抓取方向) + # "thickness_tol": 0.008, # 误差容忍,约 0.8cm + # "grasp_radius": 0.04, # 进入此半径才计算闭合奖励 + # "height_offset": 0.00, + # "target_close_pos": 0.03, # 保留给关节闭合幅度 + # }, + # weight=10.0 + # ) + gripper_close_interaction = RewTerm( + func=mdp.gripper_close_when_near, + params={ + "dryingbox_cfg": SceneEntityCfg("dryingbox"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "handle_name":"Equipment_BB_13_C", + "left_gripper_name": "right_hand_l", + "right_gripper_name": "right_hand__r", + "joint_names": ["right_hand_joint_left", "right_hand_joint_right"], + "target_close_pos": 0.03, + "height_offset": 0.00,#0.07 + "grasp_radius": 0.04,#original 0.03 + }, + weight=10.0 + ) + + # lid_lifted_reward = RewTerm( + # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 + # }, + # weight=10.0 # 给一个足够大的权重 + # ) + + # lid_lifting_reward = RewTerm( + # func=mdp.lid_lift_progress_reward, + # params={ + # "lid_cfg": SceneEntityCfg("lid"), + # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 + # "target_height_lift": 0.25, + # "height_offset": 0.07, # 与其他奖励保持一致 + # "std": 0.1 + # }, + # # 权重设大一点,告诉它这是最终目标 + # weight=10.0 + # ) + + + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + mindbot_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0}, + ) + + # lid_fly_away = DoneTerm( + # func=mdp.base_height_failure, + # params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0}, + # ) + + # # 新增:盖子掉落判定 + # # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间) + # lid_dropped = DoneTerm( + # func=mdp.base_height_failure, # 复用高度判定函数 + # params={ + # "asset_cfg": SceneEntityCfg("lid"), + # "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了 + # }, + # ) + + + +## +# Environment configuration +## + +@configclass +class CurriculumCfg: + pass + # action_rate = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}) + + # joint_vel = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}) + + # position_std_scheduler = CurrTerm( + # func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类 + # params={ + # # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键 + # "address": "rewards.gripper_lid_position_alignment.params.std", + + # # 修改逻辑:使用我们刚才写的函数 + # "modify_fn": mdp.annealing_std, + + # # 传给函数的参数 + # "modify_params": { + # "start_step": 00, # 约 600 轮 + # "end_step": 5_000, # 约 1200 轮 + # "start_std": 0.3, + # "end_std": 0.05, + # } + # } + # ) + +@configclass +class PullEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: PullSceneCfg = PullSceneCfg(num_envs=5, env_spacing=3.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + events: EventCfg = EventCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + # curriculum: CurriculumCfg = CurriculumCfg() + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz + self.episode_length_s = 10 + # viewer settings + self.viewer.eye = (3.5, 0.0, 3.2) + # simulation settings + self.sim.dt = 1 / 120 #original 1 / 60 + self.sim.render_interval = self.decimation + # # 1. 基础堆内存 + self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB + self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 + self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024 + self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点 + self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch + + # # 5. 聚合对容量 (针对复杂的 Articulation) + self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024 diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py new file mode 100644 index 0000000..0750a8e --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Pull-Ultrasound-Lid-Up-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..800b162 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 # 稍微增加每轮步数 + max_iterations = 5000 # 增加迭代次数,给它足够的时间学习 + save_interval = 100 + # experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了 + experiment_name = "mindbot_pullUltrasoundLidUp" + + policy = RslRlPpoActorCriticCfg( + init_noise_std=0.7, + actor_obs_normalization=True, # 开启归一化 + critic_obs_normalization=True, # 开启归一化 + # 增加网络容量:三层 MLP,宽度足够 + actor_hidden_dims=[128, 64, 32], + critic_hidden_dims=[128, 64, 32], + activation="elu", + ) + + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, # 保持适度的探索 + num_learning_epochs=5, + num_mini_batches=8, + learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4 + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +# @configclass +# class PPORunnerCfg(RslRlOnPolicyRunnerCfg): +# num_steps_per_env = 16 +# max_iterations = 150 +# save_interval = 50 +# experiment_name = "cartpole_direct" +# policy = RslRlPpoActorCriticCfg( +# init_noise_std=1.0, +# actor_obs_normalization=False, +# critic_obs_normalization=False, +# actor_hidden_dims=[32, 32], +# critic_hidden_dims=[32, 32], +# activation="elu", +# ) +# algorithm = RslRlPpoAlgorithmCfg( +# value_loss_coef=1.0, +# use_clipped_value_loss=True, +# clip_param=0.2, +# entropy_coef=0.005, +# num_learning_epochs=5, +# num_mini_batches=4, +# learning_rate=1.0e-3, +# schedule="adaptive", +# gamma=0.99, +# lam=0.95, +# desired_kl=0.01, +# max_grad_norm=1.0, +# ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py new file mode 100644 index 0000000..4ab42b0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the environment.""" + +from isaaclab.envs.mdp import * # noqa: F401, F403 + +from .rewards import * # noqa: F401, F403 +from .terminations import * # noqa: F401, F403 +from .gripper import * # noqa: F401, F403 +from .curriculums import * # noqa: F401, F403 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py new file mode 100644 index 0000000..4e5d833 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +from isaaclab.envs.mdp import modify_term_cfg + +def annealing_std( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + current_value: float, + start_step: int, + end_step: int, + start_std: float, + end_std: float +): + """ + 根据步数线性退火 std 值。 + + Args: + current_value: 当前的参数值 (系统自动传入) + start_step: 开始退火的步数 + end_step: 结束退火的步数 + start_std: 初始 std + end_std: 最终 std + """ + current_step = env.common_step_counter + + # 1. 还没到开始时间 -> 保持初始值 (或者不改) + if current_step < start_step: + # 如果当前值还没被设为 start_std,就强制设一下,否则不动 + return start_std + + # 2. 已经超过结束时间 -> 保持最终值 + elif current_step >= end_step: + return end_std + + # 3. 在中间 -> 线性插值 + else: + ratio = (current_step - start_step) / (end_step - start_step) + new_std = start_std + (end_std - start_std) * ratio + return new_std \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py new file mode 100644 index 0000000..981ac01 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py @@ -0,0 +1,54 @@ +# 假设这是在你的 mdp.py 文件中 + +from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg +from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction +from isaaclab.utils import configclass +import torch + + +class CoupledJointPositionAction(JointPositionAction): + def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): + super().__init__(cfg, env) + + @property + def action_dim(self) -> int: + """强制 ActionManager 认为只需要 1D 输入。""" + return 1 + + """ + 这是运行时被实例化的类。它继承自 JointPositionAction。 + """ + def process_actions(self, actions: torch.Tensor): + scale = self.cfg.scale + offset = self.cfg.offset + # store the raw actions + self._raw_actions[:] = torch.clamp(actions, -1, 1) + # apply the affine transformations + target_position_interval = self._raw_actions * scale + offset + right_cmd = target_position_interval + left_cmd = -target_position_interval + # import pdb; pdb.set_trace() + self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1) + # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}") + # clip actions + if self.cfg.clip is not None: + self._processed_actions = torch.clamp( + self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1] + ) + + +@configclass +class CoupledJointPositionActionCfg(JointPositionActionCfg): + """ + 配置类。关键在于设置 class_type 指向上面的实现类。 + """ + # !!! 关键点 !!! + # 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类" + class_type = CoupledJointPositionAction + + def __post_init__(self) -> None: + # 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它 + if len(self.joint_names) != 2: + raise ValueError("Requires exactly two joint names.") + super().__post_init__() + diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py new file mode 100644 index 0000000..561c2f6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py @@ -0,0 +1,881 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize joint position deviation from a target value.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + # wrap the joint positions to (-pi, pi) + joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids]) + # compute the reward + return torch.sum(torch.square(joint_pos - target), dim=1) + + +def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + """Reward the agent for lifting the lid above the minimal height.""" + lid: RigidObject = env.scene[lid_cfg.name] + # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,) + height = lid.data.root_pos_w[:, 2] + return torch.where(height > minimal_height, 1.0, 0.0) + +def gripper_lid_distance(env: ManagerBasedRLEnv, std: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for reaching the lid using tanh-kernel.""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # float 化,防止成为 (3,) 向量 + if not isinstance(std, float): + if torch.is_tensor(std): + std = std.item() + else: + std = float(std) + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + # body_idx = robot.find_bodies(gripper_body_name)[0] + # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1) + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :] + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6 + + return 1 - torch.tanh(distance / std) + + +def lid_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Reward the agent for grasping the lid (close and lifted).""" + # extract the used quantities (to enable type-hinting) + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # Target lid position: (num_envs, 3) + lid_pos_w = lid.data.root_pos_w[:, :3] + + # Gripper position: (num_envs, 3) + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + + # Distance of the gripper to the lid: (num_envs,) + distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + + # Check if close and lifted + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > 0.1 + + return torch.where(is_close & is_lifted, 1.0, 0.0) + +def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + """Combined reward for reaching the lid AND lifting it.""" + # Get reaching reward + reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name) + # Get lifting reward + lift_reward = lid_is_lifted(env, minimal_height, lid_cfg) + # Combine rewards multiplicatively + return reach_reward * lift_reward + + + +# def gripper_lid_position_alignment(env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand__l", +# right_gripper_name: str = "left_hand_r", +# height_offset: float = 0.1, +# std: float = 0.1) -> torch.Tensor: +# """奖励函数:夹爪相对于lid的位置对齐。 + +# Args: +# env: 环境实例 +# lid_cfg: lid的配置 +# robot_cfg: 机器人的配置 +# left_gripper_name: 左侧夹爪body名称 +# right_gripper_name: 右侧夹爪body名称 +# side_distance: Y方向的期望距离 +# height_offset: Z方向的期望高度偏移 +# std: tanh核函数的标准差 + +# Returns: +# 位置对齐奖励 (num_envs,) +# """ +# lid: RigidObject = env.scene[lid_cfg.name] +# robot: Articulation = env.scene[robot_cfg.name] + +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# # 获取两个夹爪的位置 +# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) +# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + +# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) +# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + +# # 计算夹爪中心位置 +# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + +# # 计算相对位置(夹爪中心相对于 lid 中心) +# rel_pos = gripper_center - lid_pos_w # (num_envs, 3) + +# # X 方向位置:应该对齐(接近 0) +# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6 +# x_reward = 1.0 - torch.tanh(x_dist / std) + +# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance) +# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6 +# # y_error = torch.abs(y_dist - side_distance) +# # y_reward = 1.0 - torch.tanh(y_error / std) +# y_reward = 1.0 - torch.tanh(y_dist / std) + +# # Z 方向位置:应该在 lid 上方 height_offset 处 +# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6 +# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6 +# z_reward = 1.0 - torch.tanh(z_error / std) + +# # 组合位置奖励 +# position_reward = x_reward * y_reward * z_reward + +# return position_reward + + +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:夹爪相对于lid的位置对齐(简化版)。 + + 计算夹爪中心到目标点(lid上方height_offset处)的距离奖励。 + + Args: + env: 环境实例 + lid_cfg: lid的配置 + robot_cfg: 机器人的配置 + left_gripper_name: 左侧夹爪body名称 + right_gripper_name: 右侧夹爪body名称 + height_offset: Z方向的期望高度偏移(目标点在lid上方) + std: tanh核函数的标准差 + + Returns: + 位置对齐奖励 (num_envs,) + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 目标点:lid位置 + height_offset(Z方向) + target_pos = lid_pos_w.clone() + # print(target_pos[0]) + target_pos[:, 2] += height_offset # 在lid上方height_offset处 + # print(target_pos[0]) + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3) + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3) + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3) + # print(gripper_center[0]) + # 计算夹爪中心到目标点的3D距离 + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 距离奖励:距离越小,奖励越大 + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# ... (保留原有的 import) + +# ============================================================================== +# 新增:直接距离惩罚 (线性引导) +# ============================================================================== +def gripper_lid_distance_penalty( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.07, +) -> torch.Tensor: + """ + 计算夹爪中心到目标点的欧式距离。 + 返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。 + 这样:距离越远 -> 惩罚越大 (Total Reward 越低)。 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + # 1. 目标点 (Lid中心 + 高度偏移) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 2. 夹爪中心 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + gripper_center = (left_pos + right_pos) / 2.0 + + # 3. 计算距离 + distance = torch.norm(gripper_center - lid_pos_w, dim=1) + + return distance + +# ============================================================================== +# 优化:位置对齐 (保持原函数,但确保逻辑清晰) +# ============================================================================== +def gripper_lid_position_alignment(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + height_offset: float = 0.02, + std: float = 0.1) -> torch.Tensor: + """奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)""" + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + target_pos = lid_pos_w.clone() + target_pos[:, 2] += height_offset + + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + + distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6 + + # 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight + position_reward = 1.0 - torch.tanh(distance / std) + + return position_reward + + +# def gripper_lid_orientation_alignment( +# env, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# gripper_body_name: str = "left_hand_body", +# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z +# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) +# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y +# ) -> torch.Tensor: + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 获取姿态 +# lid_quat_w = lid.data.root_quat_w +# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) +# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + +# # 3. 垂直对齐 (Vertical Alignment) +# # 目标:夹爪 +Y 指向 World -Z +# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) +# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) +# # print(grip_fwd_world[0]) +# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + +# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) +# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add +# # 映射 [-1, 1] -> [0, 1],且全程有梯度 +# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加 +# # rew_vertical = (dot_vertical + 1.0) / 2.0 +# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0) +# rew_vertical = torch.pow(val_vertical, 2) + 1e-4 +# # nan -> 0 +# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + +# # 4. 偏航对齐 (Yaw Alignment) +# # 目标:夹爪 +Z 平行于 Lid +Y +# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) +# grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + +# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) +# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + +# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) +# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 +# # nan -> 0 +# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + +# # 5. 组合 +# total_reward = rew_vertical * rew_yaw + +# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + +# # debug +# if not torch.isfinite(total_reward).all(): +# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") +# print(f"rew_vertical: {rew_vertical}") +# print(f"rew_yaw: {rew_yaw}") + +# return total_reward + + +def gripper_lid_orientation_alignment( + env, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body", + gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z + gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行) + lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y + max_angle_deg: float = 60.0, # 允许的最大角度偏差(度) +) -> torch.Tensor: + + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 获取姿态 + lid_quat_w = lid.data.root_quat_w + body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True) + gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] + + # 3. 垂直对齐 (Vertical Alignment) + # 目标:夹爪前向轴指向 World -Z(向下) + grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1) + grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local) + world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + + # 计算点积(归一化后,点积 = cos(角度)) + dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1) + dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) + + # 计算角度(弧度) + angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0)) + max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device) + + # 如果角度 <= max_angle_deg,给予奖励 + # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0 + angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0) + rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大 + + # 如果角度超过60度,奖励为0 + rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical)) + + # nan -> 0 + rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical) + + # 4. 偏航对齐 (Yaw Alignment) + # 目标:夹爪 +Z 平行于 Lid +Y + grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1) + grip_width_world = quat_apply(gripper_quat_w, grip_width_local) + + lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1) + lid_handle_world = quat_apply(lid_quat_w, lid_handle_local) + + dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1) + rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4 + # nan -> 0 + rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw) + + # 5. 组合 + total_reward = rew_vertical * rew_yaw + + total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0) + + # debug + if not torch.isfinite(total_reward).all(): + print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") + print(f"rew_vertical: {rew_vertical}") + print(f"rew_yaw: {rew_yaw}") + + return total_reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body +# # 注意:确保 joint_names 只包含那两个夹爪关节 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合 +# target_open_pos: float = 0.0, # 【修正】张开是 0.0 +# target_close_pos: float = 0.03 # 【修正】闭合是 0.03 +# ) -> torch.Tensor: +# """ +# 逻辑很简单: +# 1. 如果 距离 < close_threshold:目标关节位置 = 0.03 (闭合) +# 2. 如果 距离 >= close_threshold:目标关节位置 = 0.0 (张开) +# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算距离 (末端 vs Lid中心) +# lid_pos_w = lid.data.root_pos_w[:, :3] + +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # distance: (num_envs,) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 3. 动态确定目标关节值 +# # 如果距离小于阈值,目标设为 close_pos,否则设为 open_pos +# # target_val: (num_envs, 1) +# # print(distance) +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 4. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# # joint_pos: (num_envs, 2) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] + +# # 取绝对值(防止左指是负数的情况,虽然你的配置里范围看起来都是正的,加上abs更保险) +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 5. 计算误差 +# # error: (num_envs,) -> 两个手指误差的平均值 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # # 6. 返回负奖励 (惩罚) +# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。 +# # return -error +# # ===================================================== +# # 🚀 核心修复:安全输出 +# # ===================================================== +# # 1. 使用 tanh 限制数值范围在 [-1, 0] +# reward = -torch.tanh(error / 0.01) + +# # 2. 过滤 NaN!这一步能救命! +# # 如果物理引擎算出 NaN,这里会把它变成 0.0,防止训练崩溃 +# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0) + +# return reward + + +# def gripper_open_close_reward( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# left_gripper_name: str = "left_hand_body", +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# close_threshold: float = 0.05, +# target_open_pos: float = 0.0, +# target_close_pos: float = 0.03, +# height_offset: float = 0.06, # 新增:与位置奖励保持一致 +# ) -> torch.Tensor: +# """ +# 逻辑: +# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。 +# 2. 如果距离 < close_threshold:目标关节位置 = 闭合。 +# 3. 否则:目标关节位置 = 张开。 +# """ + +# # 1. 获取对象 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] + +# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移) +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置 + +# # 获取夹爪位置 +# body_ids, _ = robot.find_bodies([left_gripper_name]) +# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3] + +# # 3. 计算距离 (夹爪 vs 目标抓取点) +# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1) + +# # 4. 动态确定目标关节值 +# # 靠近抓取点 -> 闭合;远离 -> 张开 +# target_val = torch.where( +# distance < close_threshold, +# torch.tensor(target_close_pos, device=env.device), +# torch.tensor(target_open_pos, device=env.device) +# ).unsqueeze(1) + +# # 5. 获取当前关节位置 +# joint_ids, _ = robot.find_joints(joint_names) +# current_joint_pos = robot.data.joint_pos[:, joint_ids] +# current_joint_pos_abs = torch.abs(current_joint_pos) + +# # 6. 计算误差并返回奖励 +# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1) + +# # 使用 tanh 限制数值范围 +# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大 + +# return torch.nan_to_num(reward, nan=0.0) + + +def _is_grasped( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg, + robot_cfg: SceneEntityCfg, + left_gripper_name: str, + right_gripper_name: str, + joint_names: list, + height_offset: float, + grasp_radius: float, + target_close_pos: float +) -> torch.Tensor: + """ + 内部辅助函数:判定是否成功抓取。 + 条件: + 1. 夹爪中心在把手目标点附近 (Sphere Check) + 2. 夹爪处于闭合发力状态 (Joint Effort Check) + 3. (关键) 夹爪Z轴高度合适,不能压在盖子上面 (Z-Axis Check) + """ + # 1. 获取对象 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 2. 目标点位置 (把手中心) + lid_pos_w = lid.data.root_pos_w[:, :3].clone() + lid_pos_w[:, 2] += height_offset + + # 3. 夹爪位置 + l_ids, _ = robot.find_bodies([left_gripper_name]) + r_ids, _ = robot.find_bodies([right_gripper_name]) + pos_L = robot.data.body_pos_w[:, l_ids[0], :3] + pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + gripper_center = (pos_L + pos_R) / 2.0 + + # 4. 条件一:距离判定 (在把手球范围内) + dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1) + is_near = dist_to_handle < grasp_radius + + # 5. 条件二:Z轴高度判定 (防止压在盖子上) + # 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高 + # 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多 + z_diff = gripper_center[:, 2] - lid_pos_w[:, 2] + is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差 + + # 6. 条件三:闭合判定 (正在尝试闭合) + joint_ids, _ = robot.find_joints(joint_names) + # 获取关节位置 (绝对值) + joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + + # === 修改闭合判定 === + + # 1. 关节位置判定 (Effort Check) + # 只要用力了就行,去掉上限判定,防止夹太紧被误判 + # 假设 0.05 是你的新 target_close_pos + is_effort_closing = torch.all(joint_pos > 0.005, dim=1) + + # 2. 几何排斥判定 (Geometry Check) + # 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小 + # 获取指尖位置 + dist_fingertips = torch.norm(pos_L - pos_R, dim=-1) + + # 假设把手厚度是 1cm (0.01m) + # 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西 + # 如果指尖距离 >= 0.005,说明中间有东西挡着 + is_not_empty = dist_fingertips > 0.005 + + # 综合判定: + # 1. 在把手附近 (is_near) + # 2. 高度合适 (is_z_valid) + # 3. 在用力闭合 (is_effort_closing) + # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了! + return is_near & is_z_valid & is_effort_closing & is_not_empty + + +def gripper_close_when_near( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + target_close_pos: float = 0.03, + height_offset: float = 0.02, + grasp_radius: float = 0.05 +) -> torch.Tensor: + + # 1. 判定基础抓取条件是否满足 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算夹紧程度 (Clamping Intensity) + robot = env.scene[robot_cfg.name] + joint_ids, _ = robot.find_joints(joint_names) + # 获取当前关节位置绝对值 (num_envs, num_joints) + current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 计算两个指关节的平均闭合深度 + mean_joint_pos = torch.mean(current_joint_pos, dim=1) + + # 计算奖励系数:当前位置 / 目标闭合位置 + # 这样当关节越接近 0.03 时,奖励越接近 1.0 + # 强制让 Agent 产生“压满”动作的冲动 + clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0) + + # 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励 + # 如果没对准或者没夹到,奖励依然是 0 + return torch.where(is_grasped, clamping_factor, 0.0) + +# def gripper_close_when_near( +# env: ManagerBasedRLEnv, +# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), +# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), +# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 +# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 +# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 +# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], +# target_close_pos: float = 0.03, +# height_offset: float = 0.03, +# grasp_radius: float = 0.02 # 球面半径 2cm +# ) -> torch.Tensor: + +# # 1. 获取位置 +# lid = env.scene[lid_cfg.name] +# robot = env.scene[robot_cfg.name] +# lid_pos_w = lid.data.root_pos_w[:, :3].clone() +# lid_pos_w[:, 2] += height_offset # 把手中心 + +# # 获取左右指尖位置 +# l_ids, _ = robot.find_bodies([left_gripper_name]) +# r_ids, _ = robot.find_bodies([right_gripper_name]) +# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] +# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] + +# # 计算夹爪中心 +# pos_center = (pos_L + pos_R) / 2.0 + +# # 2. 距离判定 (Is Inside Sphere?) +# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) +# is_in_sphere = (dist_center < grasp_radius).float() + +# # 3. "在中间"判定 (Is Between?) +# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 +# vec_LR = pos_R - pos_L # 左指指向右指 +# vec_LO = lid_pos_w - pos_L # 左指指向物体 + +# # 计算投影比例 t +# # P_proj = P_L + t * (P_R - P_L) +# # t = (vec_LO . vec_LR) / |vec_LR|^2 +# # 如果 0 < t < 1,说明投影在两个指尖之间 + +# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 +# dot = torch.sum(vec_LO * vec_LR, dim=-1) +# t = dot / len_sq + +# is_between = (t > 0.0) & (t < 1.0) +# is_between = is_between.float() + +# # 4. 闭合判定 +# joint_ids, _ = robot.find_joints(joint_names) +# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题 +# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids]) +# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1) +# # 只有当非常接近闭合目标时才给分 +# is_closing = (close_error < 0.005).float() # 允许 5mm 误差 + +# # 5. 最终奖励 +# # 只有三者同时满足才给 1.0 分 +# reward = is_in_sphere * is_between * is_closing + +# return torch.nan_to_num(reward, nan=0.0) + + +# def lid_is_lifted( +# env:ManagerBasedRLEnv, +# minimal_height:float, +# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid") +# ) -> torch.Tensor: + +# lid = env.scene[lid_cfg.name] +# lid_height = lid.data.root_pos_w[:, 2] +# reward=torch.where(lid_height > minimal_height, 1.0, 0.0) +# reward=torch.nan_to_num(reward, nan=0.0) + +# return reward + + +def lid_is_lifted( + env: ManagerBasedRLEnv, + minimal_height: float = 0.05, # 相对提升阈值 + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + height_offset: float = 0.03, + grasp_radius: float = 0.1, + target_close_pos: float = 0.03, +) -> torch.Tensor: + + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + lid = env.scene[lid_cfg.name] + + # 1. 获取高度 + current_height = lid.data.root_pos_w[:, 2] + # 自动获取初始高度 + initial_height = lid.data.default_root_state[:, 2] + + # 2. 计算提升量 + lift_height = torch.clamp(current_height - initial_height, min=0.0) + + # 3. 速度检查 (防撞飞) + # 如果速度 > 1.0 m/s,视为被撞飞,不给分 + lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1) + is_stable = lid_vel < 1.0 + + # 4. 计算奖励 + # 基础分:高度越高分越高 + shaping_reward = lift_height * 2.0 + # 成功分:超过阈值 + success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0) + + # 组合 + # 必须 is_grasped AND is_stable + reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height)) + + return torch.nan_to_num(reward, nan=0.0) + + + +def lid_lift_success_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand_body", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + # 关键参数 + settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调) + target_lift_height: float = 0.05, # 目标提升高度 (5cm) + grasp_dist_threshold: float = 0.05, # 抓取判定距离 + closed_pos: float = 0.03 # 夹爪闭合阈值 +) -> torch.Tensor: + """ + 提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。 + """ + # 1. 获取数据 + lid = env.scene[lid_cfg.name] + robot = env.scene[robot_cfg.name] + + # 盖子实时高度 + lid_z = lid.data.root_pos_w[:, 2] + lid_pos = lid.data.root_pos_w[:, :3] + + # 夹爪位置 + body_ids, _ = robot.find_bodies([left_gripper_name]) + gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3] + + # 2. 【条件一:是否夹稳 (Is Grasped?)】 + # 距离检查 + distance = torch.norm(gripper_pos - lid_pos, dim=-1) + is_close = distance < grasp_dist_threshold + + # 闭合检查 + joint_ids, _ = robot.find_joints(joint_names) + joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids]) + # 假设 > 80% 的闭合度算抓紧了 + is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1) + + is_grasped = is_close & is_closed + + # 3. 【条件二:计算相对提升 (Relative Lift)】 + # 当前高度 - 初始稳定高度 + current_lift = lid_z - settled_height + + # 4. 计算奖励 + lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0) + + # 基础提升分 (Shaping Reward) + lift_reward = lift_progress + + # 成功大奖 (Success Bonus) + # 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖 + success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0) + + # 组合:只有在 is_grasped 为 True 时才发放提升奖励 + total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward)) + + # 5. 安全输出 + return torch.nan_to_num(total_reward, nan=0.0) + + +def lid_lift_progress_reward( + env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", + right_gripper_name: str = "left_hand_r", + joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], + initial_height: float = 0.8, + target_height_lift: float = 0.15, + height_offset: float = 0.02, # 必须与抓取奖励保持一致 + grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分 + target_close_pos: float = 0.03, + std: float = 0.05 # 标准差 +) -> torch.Tensor: + + # 1. 判定是否夹住 + is_grasped = _is_grasped( + env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name, + joint_names, height_offset, grasp_radius, target_close_pos + ) + + # 2. 计算高度 + lid = env.scene[lid_cfg.name] + current_height = lid.data.root_pos_w[:, 2] + lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift) + # print(current_height) + # print(lift_height) + # 3. 计算奖励 + # 只有在 is_grasped 为 True 时,才发放高度奖励 + # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况 + progress = torch.tanh(lift_height / std) + + reward = torch.where(is_grasped, progress, 0.0) + + return reward + diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py new file mode 100644 index 0000000..8b31be2 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py @@ -0,0 +1,97 @@ +from __future__ import annotations +import torch +from typing import TYPE_CHECKING +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +def lid_dropped(env: ManagerBasedRLEnv, + minimum_height: float = -0.05, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + return lid.data.root_pos_w[:, 2] < minimum_height + +def lid_successfully_grasped(env: ManagerBasedRLEnv, + distance_threshold: float = 0.03, + height_threshold: float = 0.2, + lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + gripper_body_name: str = "left_hand_body") -> torch.Tensor: + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + body_idx = robot.find_bodies(gripper_body_name)[0] + gripper_pos_w = robot.data.body_pos_w[:, body_idx, :] + distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1) + is_close = distance < distance_threshold + is_lifted = lid.data.root_pos_w[:, 2] > height_threshold + return is_close & is_lifted + +def gripper_at_lid_side(env: ManagerBasedRLEnv, + lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), + robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), + left_gripper_name: str = "left_hand__l", # 改为两个下划线 + right_gripper_name: str = "left_hand_r", + side_distance: float = 0.05, + side_tolerance: float = 0.01, + alignment_tolerance: float = 0.02, + height_offset: float = 0.1, + height_tolerance: float = 0.02) -> torch.Tensor: + """Terminate when gripper center is positioned on the side of the lid at specified height. + + 坐标系说明: + - X 方向:两个夹爪朝中心合并的方向 + - Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致 + - Z 方向:高度方向 + """ + lid: RigidObject = env.scene[lid_cfg.name] + robot: Articulation = env.scene[robot_cfg.name] + + lid_pos_w = lid.data.root_pos_w[:, :3] + + # 获取两个夹爪的位置 + left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) + right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) + + left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] + right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] + + # 计算夹爪中心位置 + gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 + rel_pos = gripper_center - lid_pos_w + + # Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance) + y_dist = torch.abs(rel_pos[:, 1]) + y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance) + + # X 方向:应该对齐(接近 0) + x_dist = torch.abs(rel_pos[:, 0]) + x_ok = x_dist < alignment_tolerance + + # Z 方向:应该在 lid 上方 height_offset 处 + z_error = torch.abs(rel_pos[:, 2] - height_offset) + z_ok = z_error < height_tolerance + + # 所有条件都要满足 + return x_ok & y_ok & z_ok + + +def base_height_failure(env: ManagerBasedRLEnv, + minimum_height: float | None = None, + maximum_height: float | None = None, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + """Terminate when the robot's base height is outside the specified range.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + root_pos_z = asset.data.root_pos_w[:, 2] + + # check if height is outside the range + out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool) + if minimum_height is not None: + out_of_bounds |= root_pos_z < minimum_height + if maximum_height is not None: + out_of_bounds |= root_pos_z > maximum_height + + return out_of_bounds + diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py new file mode 100644 index 0000000..c8e5971 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py @@ -0,0 +1,578 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from re import A, T +from tkinter import N +import torch +from isaaclab.actuators import ImplicitActuatorCfg +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg +from isaaclab.sensors import CameraCfg + +from . import mdp + +## +# Pre-defined configs +## + +from mindbot.robot.mindbot import MINDBOT_CFG + +# ====== 其他物体配置 ====== +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg + + +TABLE_CFG=RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.95, -0.3, 0.005], + rot=[0.7071, 0.0, 0.0, 0.7071], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd", + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +LID_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Lid", + init_state=RigidObjectCfg.InitialStateCfg( + pos=[0.2, 0.65, 0.9], + rot=[1.0, 0.0, 0.0, 0.0], + lin_vel=[0.0, 0.0, 0.0], + ang_vel=[0.0, 0.0, 0.0], + ), + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd", + # scale=(0.2, 0.2, 0.2), + rigid_props=RigidBodyPropertiesCfg( + rigid_body_enabled= True, + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=0.5,#original 5.0 + linear_damping=5.0, #original 0.5 + angular_damping=5.0, #original 0.5 + disable_gravity=False, + ), + collision_props=CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.0005,#original 0.02 + rest_offset=0 + ), + ), +) + +ROOM_CFG = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Room", + spawn=UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd", + ), +) + +# ROOM_CFG = RigidObjectCfg( +# prim_path="{ENV_REGEX_NS}/Room", +# init_state=RigidObjectCfg.InitialStateCfg( +# pos=[0.0, 0.0, 0.0], +# rot=[1.0, 0.0, 0.0, 0.0], +# lin_vel=[0.0, 0.0, 0.0], +# ang_vel=[0.0, 0.0, 0.0], +# ), +# spawn=UsdFileCfg( +# usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd", +# # scale=(0.2, 0.2, 0.2), +# rigid_props=RigidBodyPropertiesCfg( +# rigid_body_enabled= True, +# solver_position_iteration_count=32, +# solver_velocity_iteration_count=16, +# max_angular_velocity=1000.0, +# max_linear_velocity=1000.0, +# max_depenetration_velocity=0.5,#original 5.0 +# linear_damping=5.0, #original 0.5 +# angular_damping=5.0, #original 0.5 +# disable_gravity=False, +# ), +# collision_props=CollisionPropertiesCfg( +# collision_enabled=True, +# contact_offset=0.0005,#original 0.02 +# rest_offset=0 +# ), +# ), +# ) + + + +ULTRASOUND_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=1.0 + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False,# + solver_position_iteration_count=32, + solver_velocity_iteration_count=16, + stabilization_threshold=1e-6, + # fix_root_link=True, + ), + ), + init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)), + # actuators={} + actuators={ + "passive_damper": ImplicitActuatorCfg( + # ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他) + joint_names_expr=[".*"], + + stiffness=0.0, + damping=1000.0, + effort_limit_sim=10000.0, + velocity_limit_sim=100.0, + ), + } +) + +## +# Scene definition +## + + +@configclass +class MindbotSceneCfg(InteractiveSceneCfg): + """Configuration for a cart-pole scene.""" + + # ground plane + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)), + ) + + # robot + Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room") + Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot") + table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table") + ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound") + lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid") + # lights + dome_light = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), + ) + # head_camera=CameraCfg( + # prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + # update_period=1/120, + # height=480, + # width=640, + # data_type=["rgb"], + # spawn=None, + # ) + left_hand_camera=CameraCfg( + prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", + update_period=1/120, + height=480, + width=640, + data_types=["rgb"], + spawn=None, + ) + right_hand_camera=CameraCfg( + prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", + update_period=1/120, + height=480, + width=640, + data_types=["rgb"], + spawn=None, + ) + head_camera=CameraCfg( + prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", + update_period=1/120, + height=480, + width=640, + data_types=["rgb"], + spawn=None, + ) + chest_camera=CameraCfg( + prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", + update_period=1/120, + height=480, + width=640, + data_types=["rgb"], + spawn=None, + ) +## +# MDP settings +## + +def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor: + asset = env.scene[asset_cfg.name] + body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True) + pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins + return pos_w + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + # 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维) + left_arm_ee = DifferentialInverseKinematicsActionCfg( + asset_name="Mindbot", + joint_names=["l_joint[1-6]"], # 左臂的6个关节 + body_name="left_hand_body", # 末端执行器body名称 + controller=DifferentialIKControllerCfg( + command_type="pose", # 控制位置+姿态 + use_relative_mode=True, # 相对模式:动作是增量 + ik_method="dls", # Damped Least Squares方法 + ), + scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), + ) + + # right_arm_fixed = mdp.JointPositionActionCfg( + # asset_name="Mindbot", + # joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6 + + # # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动 + # scale=0.0, + + # # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里” + # # 对应 (135, -70, -90, 90, 90, -70) + # offset={ + # "r_joint1": 2.3562, + # "r_joint2": -1.2217, + # "r_joint3": -1.5708, + # "r_joint4": 1.5708, + # "r_joint5": 1.5708, + # "r_joint6": -1.2217, + # }, + # ) + + grippers_position = mdp.BinaryJointPositionActionCfg( + asset_name="Mindbot", + joint_names=["left_hand_joint_left", "left_hand_joint_right"], + + # 修正:使用字典格式 + # open_command_expr={"关节名正则": 值} + open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0}, + + # close_command_expr={"关节名正则": 值} + close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03}, + ) + + + trunk_position = mdp.JointPositionActionCfg( + asset_name="Mindbot", + joint_names=["PrismaticJoint"], + scale=0.00, + offset=0.3, + clip=None + ) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}) + + left_gripper_pos = ObsTerm(func=left_gripper_pos_w, + params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])}) + lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}) + ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")}) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # 重置所有关节到 init_state(无偏移)must be + reset_mindbot_all_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "position_range": (0.0, 0.0), + "velocity_range": (0.0, 0.0), + }, + ) + # 2. 底座安装误差 (建议缩小到 2cm) + reset_mindbot_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot"), + "pose_range": { + "x": (-0.05, 0.05), + "y": (-0.05, 0.05), + "z": (0.74, 0.75), + "yaw": (-0.1, 0.1), + }, + "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)}, + }, + ) + # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适) + reset_mindbot_left_arm = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]), + "position_range": (-0.01, 0.01), + "velocity_range": (0.0, 0.0), + }, + ) + + reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", + params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) + reset_ultrasound = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("ultrasound"), + "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + reset_lid = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("lid"), + "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, + "velocity_range": {"x": (0.0, 0.0)} + } + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + gripper_lid_orientation_alignment = RewTerm( + func=mdp.gripper_lid_orientation_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "gripper_body_name": "left_hand_body", + "gripper_forward_axis": (0.0, 0.0, 1.0), + "gripper_width_axis": (0.0, 1.0, 0.0), + "lid_handle_axis": (0.0, 1.0, 0.0), + "max_angle_deg": 85.0, # 允许60度内的偏差 + }, + weight=5.0 #original 5.0 + ) + + # stage 2 + # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m + gripper_lid_position_alignment = RewTerm( + func=mdp.gripper_lid_position_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, # Z方向:lid 上方 0.1m + "std": 0.3, # 位置对齐的容忍度 + }, + weight=3.0 #original 3.0 + ) + approach_lid_penalty = RewTerm( + func=mdp.gripper_lid_distance_penalty, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, + }, + # weight 为负数表示惩罚。 + # 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。 + weight=-1.0 + ) + # 【新增】精细对齐 (引导进入 2cm 圈) + gripper_lid_fine_alignment = RewTerm( + func=mdp.gripper_lid_position_alignment, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致 + "std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效 + }, + weight=10.0 # 高权重 + ) + + gripper_close_interaction = RewTerm( + func=mdp.gripper_close_when_near, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "robot_cfg": SceneEntityCfg("Mindbot"), + "left_gripper_name": "left_hand__l", + "right_gripper_name": "left_hand_r", + "joint_names": ["left_hand_joint_left", "left_hand_joint_right"], + "target_close_pos": 0.03, + "height_offset": 0.04, + "grasp_radius": 0.03, + }, + weight=10.0 + ) + + lid_lifted_reward = RewTerm( + func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 + params={ + "lid_cfg": SceneEntityCfg("lid"), + "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 + }, + weight=10.0 # 给一个足够大的权重 + ) + + lid_lifting_reward = RewTerm( + func=mdp.lid_lift_progress_reward, + params={ + "lid_cfg": SceneEntityCfg("lid"), + "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 + "target_height_lift": 0.2, + "height_offset": 0.07, # 与其他奖励保持一致 + "std": 0.1 + }, + # 权重设大一点,告诉它这是最终目标 + weight=10.0 + ) + + + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + mindbot_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0}, + ) + + lid_fly_away = DoneTerm( + func=mdp.base_height_failure, + params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0}, + ) + + # 新增:盖子掉落判定 + # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间) + lid_dropped = DoneTerm( + func=mdp.base_height_failure, # 复用高度判定函数 + params={ + "asset_cfg": SceneEntityCfg("lid"), + "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了 + }, + ) + + + +## +# Environment configuration +## + +@configclass +class CurriculumCfg: + pass + # action_rate = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}) + + # joint_vel = CurrTerm(func=mdp.modify_reward_weight, + # params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}) + + # position_std_scheduler = CurrTerm( + # func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类 + # params={ + # # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键 + # "address": "rewards.gripper_lid_position_alignment.params.std", + + # # 修改逻辑:使用我们刚才写的函数 + # "modify_fn": mdp.annealing_std, + + # # 传给函数的参数 + # "modify_params": { + # "start_step": 00, # 约 600 轮 + # "end_step": 5_000, # 约 1200 轮 + # "start_std": 0.3, + # "end_std": 0.05, + # } + # } + # ) + +@configclass +class MindbotEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + events: EventCfg = EventCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + # curriculum: CurriculumCfg = CurriculumCfg() + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz + self.episode_length_s = 10 + # viewer settings + self.viewer.eye = (0, 2.5, 2.6)#(3.5, 0.0, 3.2) + # simulation settings + self.sim.dt = 1 / 120 #original 1 / 60 + self.sim.render_interval = self.decimation + # # 1. 基础堆内存 + self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB + self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 + self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024 + self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点 + self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch + + # # 5. 聚合对容量 (针对复杂的 Articulation) + self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024 diff --git a/test.py b/test.py new file mode 100644 index 0000000..edaf513 --- /dev/null +++ b/test.py @@ -0,0 +1,282 @@ +import argparse +import sys +import os +import time + +import imageio +import numpy as np +import h5py +import torch + +# 先引入 AppLauncher,并尽早实例化 SimulationApp,避免 pxr 未加载 +from isaaclab.app import AppLauncher +import cli_args # isort: skip + +# CLI +parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.") +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出") +cli_args.add_rsl_rl_args(parser) +AppLauncher.add_app_launcher_args(parser) +args_cli, hydra_args = parser.parse_known_args() +if args_cli.video: + args_cli.enable_cameras = True +# 将 hydra 剩余参数传递 +sys.argv = [sys.argv[0]] + hydra_args + +# ==== 先实例化 SimulationApp ==== +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +# ==== 之后再 import 依赖 isaac/pxr 的模块 ==== +import gymnasium as gym +from rsl_rl.runners import DistillationRunner, OnPolicyRunner +from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config +import mindbot.tasks # noqa: F401 + +CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"] + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) + if not resume_path: + print("[INFO] No pre-trained checkpoint for this task.") + return + elif args_cli.checkpoint: + resume_path = retrieve_file_path(args_cli.checkpoint) + else: + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + log_dir = os.path.dirname(resume_path) + env_cfg.log_dir = log_dir + + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) + + policy = runner.get_inference_policy(device=env.unwrapped.device) + try: + policy_nn = runner.alg.policy + except AttributeError: + policy_nn = runner.alg.actor_critic + + # 导出模型 + export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") + normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None)) + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + + # dt = env.unwrapped.step_dt + + # # 录制缓冲 + # cam_buffers = {n: [] for n in CAM_NAMES} + # joint_log, joint_vel_log, action_log, ts_log = [], [], [], [] + # t0 = time.time() + + # obs = env.get_observations() + # timestep = 0 + # step_count = 0 + # try: + # while simulation_app.is_running(): + # start_time = time.time() + # with torch.inference_mode(): + # actions = policy(obs) + # obs, _, dones, _ = env.step(actions) + # policy_nn.reset(dones) + + # # 相机帧(取 env0,如需全部 env 去掉 [0]) + # for name in CAM_NAMES: + # if name not in env.unwrapped.scene.sensors: + # continue + # cam = env.unwrapped.scene.sensors[name] + # rgba = cam.data.output.get("rgba", cam.data.output.get("rgb")) + # if rgba is None: + # continue + # frame = rgba[0].cpu().numpy() + # if frame.shape[-1] == 4: + # frame = frame[..., :3] + # cam_buffers[name].append(frame) + + # # 关节 / 速度 / 动作 + # robot = env.unwrapped.scene["Mindbot"] + # joint_log.append(robot.data.joint_pos.cpu().numpy()) + # joint_vel_log.append(robot.data.joint_vel.cpu().numpy()) + # action_log.append(actions.cpu().numpy()) + # ts_log.append(time.time() - t0) + + # step_count += 1 + # if args_cli.max_steps and step_count >= args_cli.max_steps: + # break + + # if args_cli.video: + # timestep += 1 + # if timestep == args_cli.video_length: + # break + + # sleep_time = dt - (time.time() - start_time) + # if args_cli.real_time and sleep_time > 0: + # time.sleep(sleep_time) + # finally: + # # 保存 HDF5 + # h5_path = os.path.join(log_dir, "rollout_multi_cam.h5") + # with h5py.File(h5_path, "w") as f: + # f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip") + # f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip") + # f.create_dataset("actions", data=np.stack(action_log), compression="gzip") + # f.create_dataset("timestamps", data=np.array(ts_log)) + # for name, frames in cam_buffers.items(): + # if not frames: + # continue + # dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip") + # if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]: + # fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05 + # else: + # fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295 + # dset.attrs["fx"] = fx + # dset.attrs["fy"] = fy + # dset.attrs["cx"] = cx + # dset.attrs["cy"] = cy + # dset.attrs["focal_length"] = 1.93 + # print(f"[INFO] Saved HDF5 to {h5_path}") + + # # 可选:单路 MP4 + # head_frames = cam_buffers["cam_head"] + # if head_frames: + # fps = int(round(1.0 / dt)) + # video_path = os.path.join(log_dir, "cam_head.mp4") + # imageio.mimsave(video_path, head_frames, fps=fps) + # print(f"[INFO] Saved video to {video_path}") + + # env.close() + dt = env.unwrapped.step_dt + + # 录制缓冲 + cam_buffers = {n: [] for n in CAM_NAMES} + joint_log, joint_vel_log, action_log, ts_log = [], [], [], [] + t0 = time.time() + + obs = env.get_observations() + timestep = 0 + while simulation_app.is_running(): + start_time = time.time() + with torch.inference_mode(): + actions = policy(obs) + obs, _, dones, _ = env.step(actions) + policy_nn.reset(dones) + + # 相机帧(取 env0,如需全部 env 去掉 [0]) + for name in CAM_NAMES: + if name not in env.unwrapped.scene.sensors: + continue + cam = env.unwrapped.scene.sensors[name] + rgba = cam.data.output.get("rgba", cam.data.output.get("rgb")) + if rgba is None: + continue + frame = rgba[0].cpu().numpy() + if frame.shape[-1] == 4: + frame = frame[..., :3] + cam_buffers[name].append(frame) + + # 关节 / 速度 / 动作 + robot = env.unwrapped.scene["Mindbot"] + joint_log.append(robot.data.joint_pos.cpu().numpy()) + joint_vel_log.append(robot.data.joint_vel.cpu().numpy()) + action_log.append(actions.cpu().numpy()) + ts_log.append(time.time() - t0) + + if args_cli.video: + timestep += 1 + if timestep == args_cli.video_length: + break + + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # 保存 HDF5 + h5_path = os.path.join(log_dir, "rollout_multi_cam.h5") + with h5py.File(h5_path, "w") as f: + f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip") + f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip") + f.create_dataset("actions", data=np.stack(action_log), compression="gzip") + f.create_dataset("timestamps", data=np.array(ts_log)) + for name, frames in cam_buffers.items(): + if not frames: + continue + dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip") + # 内参:按你相机 vector 设置 + if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]: + fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05 + else: + fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295 + dset.attrs["fx"] = fx + dset.attrs["fy"] = fy + dset.attrs["cx"] = cx + dset.attrs["cy"] = cy + dset.attrs["focal_length"] = 1.93 + print(f"[INFO] Saved HDF5 to {h5_path}") + + # 可选:单路 MP4 + head_frames = cam_buffers["cam_head"] + if head_frames: + fps = int(round(1.0 / dt)) + video_path = os.path.join(log_dir, "cam_head.mp4") + imageio.mimsave(video_path, head_frames, fps=fps) + print(f"[INFO] Saved video to {video_path}") + + env.close() + + +if __name__ == "__main__": + main() + simulation_app.close() + +# import h5py + +# h5_path = r"C:\Users\PC\workpalce\mindbot\logs\rsl_rl\mindbot_grasp\2026-01-15_11-51-00\rollout_multi_cam.h5" + +# with h5py.File(h5_path, "r") as f: +# def walk(name, obj): +# if isinstance(obj, h5py.Dataset): +# print(f"{name}: shape={obj.shape}, dtype={obj.dtype}") +# else: +# print(f"{name}/") +# f.visititems(walk) \ No newline at end of file diff --git a/test_sence.py b/test_sence.py new file mode 100644 index 0000000..9a56c2b --- /dev/null +++ b/test_sence.py @@ -0,0 +1,73 @@ +from isaaclab.app import AppLauncher +import argparse + +# ----------------------------------------------------------------------------- +# 1. 启动 Isaac Sim +# ----------------------------------------------------------------------------- +parser = argparse.ArgumentParser() +AppLauncher.add_app_launcher_args(parser) +args = parser.parse_args() + +app_launcher = AppLauncher(args) +simulation_app = app_launcher.app + +# ----------------------------------------------------------------------------- +# 2. 必须先创建 SimulationContext(PhysX 在这里初始化) +# ----------------------------------------------------------------------------- +import isaaclab.sim as sim_utils + +sim_cfg = sim_utils.SimulationCfg( + dt=1 / 120, + device=args.device, +) +sim = sim_utils.SimulationContext(sim_cfg) + +# (可选)设置相机 +sim.set_camera_view( + eye=[3.0, 0.0, 2.0], + target=[0.0, 0.0, 0.5], +) + +# ----------------------------------------------------------------------------- +# 3. 再 play timeline(这一步非常关键) +# ----------------------------------------------------------------------------- +import omni.timeline + +timeline = omni.timeline.get_timeline_interface() +timeline.play() + +# ----------------------------------------------------------------------------- +# 4. 现在才可以安全创建 Env +# ----------------------------------------------------------------------------- +import torch +from isaaclab.envs import ManagerBasedRLEnv +from mindbot.tasks.manager_based.pull.pull_env_cfg import PullEnvCfg + + +def main(): + env_cfg = PullEnvCfg() + env_cfg.scene.num_envs = 1 + + env = ManagerBasedRLEnv(cfg=env_cfg) + + obs, _ = env.reset() + print("[OK] Env reset") + + for step in range(100000): + actions = torch.zeros( + env.num_envs, + env.action_manager.total_action_dim, + device=env.device, + ) + obs, rew, done, trunc, info = env.step(actions) + + if step % 100 == 0: + root_pos = env.scene["Mindbot"].data.root_pos_w[0] + print(f"step {step}, root z = {root_pos[2].item():.3f}") + + print("[DONE]") + + +if __name__ == "__main__": + main() + simulation_app.close()