本地4090代码提交

This commit is contained in:
2026-01-28 19:51:03 +08:00
parent eddcdd428f
commit 196378f2d3
41 changed files with 8447 additions and 450 deletions

5
.gitignore vendored
View File

@@ -1,3 +1,8 @@
logs/
outputs/
.venv/
# C++ # C++
**/cmake-build*/ **/cmake-build*/
**/build*/ **/build*/

View File

@@ -9,6 +9,12 @@
import argparse import argparse
import sys import sys
import os
import torch
import torchvision
import gymnasium as gym
import time
from datetime import datetime
from isaaclab.app import AppLauncher from isaaclab.app import AppLauncher
@@ -18,7 +24,7 @@ import cli_args # isort: skip
# add argparse arguments # add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
parser.add_argument( parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
) )
@@ -53,11 +59,6 @@ simulation_app = app_launcher.app
"""Rest everything follows.""" """Rest everything follows."""
import gymnasium as gym
import os
import time
import torch
from rsl_rl.runners import DistillationRunner, OnPolicyRunner from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import ( from isaaclab.envs import (
@@ -79,6 +80,215 @@ from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401 import mindbot.tasks # noqa: F401
# ==============================================================================
# [Update] 多相机、多环境录制类
# ==============================================================================
# 1. 确保文件顶部有以下导入
import imageio
import numpy as np
"""use gpu compute to record video"""
class MultiCameraRecorder:
def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
self.env = env
self.camera_names = camera_names
self.env_indices = env_indices
self.output_dir = output_dir
self.fps = fps
self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
os.makedirs(self.output_dir, exist_ok=True)
self.cameras = {}
for name in camera_names:
if name in self.env.unwrapped.scene.keys():
self.cameras[name] = self.env.unwrapped.scene[name]
print(f"[INFO] Camera {name} linked.")
def record_step(self):
"""保持在 GPU 上克隆数据"""
for cam_name, camera_obj in self.cameras.items():
# 获取数据前强制同步一次(防止后端丢失)
rgb_data = camera_obj.data.output["rgb"]
for env_idx in self.env_indices:
# 使用 .clone() 保持在 GPU但要注意显存占用
self.frames[cam_name][env_idx].append(rgb_data[env_idx].clone())
def save_videos(self, filename_suffix=""):
print(f"[INFO] Saving videos from GPU to Disk...")
for cam_name, env_dict in self.frames.items():
for env_idx, frame_list in env_dict.items():
if not frame_list: continue
# 转换为 torchvision 格式 (T, C, H, W)
video_tensor = torch.stack(frame_list)
if video_tensor.shape[-1] == 4: # RGBA -> RGB
video_tensor = video_tensor[..., :3]
# 移动到 CPU 并保存
video_cpu = video_tensor.cpu()
output_path = os.path.join(self.output_dir, f"{cam_name}_env{env_idx}_{filename_suffix}.mp4")
# 使用 torchvision 写入 (T, H, W, C)
torchvision.io.write_video(output_path, video_cpu, fps=self.fps)
# 【关键】保存后立即释放显存
del video_tensor
del video_cpu
frame_list.clear()
torch.cuda.empty_cache()
"""use cpu compute to record video"""
# # 2. 修改 MultiCameraRecorder 类
# class MultiCameraRecorder:
# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
# self.env = env
# self.camera_names = camera_names
# self.env_indices = env_indices
# self.output_dir = output_dir
# self.fps = fps
# self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
# os.makedirs(self.output_dir, exist_ok=True)
# self.cameras = {}
# for name in camera_names:
# try:
# self.cameras[name] = self.env.unwrapped.scene[name]
# print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
# except KeyError:
# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found!")
# def record_step(self):
# """在每个仿真步调用"""
# for cam_name, camera_obj in self.cameras.items():
# # [关键修改] 获取数据前先确保数据已同步
# # 这可以防止读取到正在渲染中的内存导致 access violation
# rgb_data = camera_obj.data.output["rgb"]
# for env_idx in self.env_indices:
# if env_idx >= rgb_data.shape[0]: continue
# # 转换为 CPU 上的 numpy这种方式通常比 torchvision 的 tensor 堆叠更稳
# frame = rgb_data[env_idx].clone().detach().cpu().numpy()
# self.frames[cam_name][env_idx].append(frame)
# def save_videos(self, filename_suffix=""):
# """循环结束后调用"""
# print(f"[INFO][MultiCameraRecorder] Saving videos...")
# for cam_name, env_dict in self.frames.items():
# for env_idx, frame_list in env_dict.items():
# if not frame_list: continue
# print(f" -> Saving {cam_name} (Env {env_idx})...")
# # 处理格式并使用 imageio 保存
# processed_frames = []
# for img in frame_list:
# # [0, 1] -> [0, 255]
# if img.dtype != np.uint8:
# if img.max() <= 1.01: img = (img * 255).astype(np.uint8)
# else: img = img.astype(np.uint8)
# # 去掉 Alpha 通道
# if img.shape[-1] == 4: img = img[:, :, :3]
# processed_frames.append(img)
# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
# output_path = os.path.join(self.output_dir, fname)
# try:
# # 使用 imageio 写入视频
# imageio.mimsave(output_path, processed_frames, fps=self.fps)
# print(f" Saved: {output_path}")
# except Exception as e:
# print(f" [ERROR] Failed to save {fname}: {e}")
# class MultiCameraRecorder:
# """
# 用于从 Isaac Lab 环境中录制多个相机的图像数据,并支持多个环境实例。
# """
# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
# self.env = env
# self.camera_names = camera_names
# self.env_indices = env_indices # 要录制的环境索引列表,例如 [0, 1]
# self.output_dir = output_dir
# self.fps = fps
# # 数据结构: self.frames[camera_name][env_idx] = [list of tensors]
# self.frames = {
# cam_name: {env_idx: [] for env_idx in env_indices}
# for cam_name in camera_names
# }
# os.makedirs(self.output_dir, exist_ok=True)
# # 获取场景中的相机对象
# self.cameras = {}
# for name in camera_names:
# try:
# # 尝试获取相机句柄
# self.cameras[name] = self.env.unwrapped.scene[name]
# print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
# except KeyError:
# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found in scene! Skipping.")
# def record_step(self):
# """在每个仿真步调用"""
# for cam_name, camera_obj in self.cameras.items():
# # 获取 RGB 数据 (Shape: [num_envs, H, W, C])
# # 注意:如果数据类型是 RGBA后面处理
# rgb_data = camera_obj.data.output["rgb"]
# for env_idx in self.env_indices:
# # 边界检查
# if env_idx >= rgb_data.shape[0]:
# continue
# # [关键] 立即 clone 并转到 CPU。
# # 如果不转 CPU4个相机 * 长时间录制会迅速耗尽 GPU 显存。
# frame = rgb_data[env_idx].clone().detach().cpu()
# self.frames[cam_name][env_idx].append(frame)
# def save_videos(self, filename_suffix=""):
# """循环结束后调用,保存所有视频"""
# print(f"[INFO][MultiCameraRecorder] Saving videos for {len(self.cameras)} cameras...")
# for cam_name, env_dict in self.frames.items():
# for env_idx, frame_list in env_dict.items():
# if not frame_list:
# continue
# print(f" -> Processing {cam_name} (Env {env_idx}) with {len(frame_list)} frames...")
# # 堆叠 (T, H, W, C)
# video_tensor = torch.stack(frame_list)
# # 去除 Alpha 通道
# if video_tensor.shape[-1] == 4:
# video_tensor = video_tensor[..., :3]
# # (T, H, W, C) -> (T, C, H, W)
# video_tensor = video_tensor.permute(0, 3, 1, 2)
# # 确保是 uint8
# if video_tensor.dtype != torch.uint8:
# if video_tensor.max() <= 1.0:
# video_tensor = (video_tensor * 255).to(torch.uint8)
# else:
# video_tensor = video_tensor.to(torch.uint8)
# # 文件名: left_hand_camera_env0_2024-xx-xx.mp4
# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
# output_path = os.path.join(self.output_dir, fname)
# torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
# print(f" Saved: {output_path}")
# ==============================================================================
@hydra_task_config(args_cli.task, args_cli.agent) @hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
@@ -92,7 +302,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
# set the environment seed # set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg.seed env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
@@ -122,7 +331,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
if isinstance(env.unwrapped, DirectMARLEnv): if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env) env = multi_agent_to_single_agent(env)
# wrap for video recording # wrap for video recording (standard Gym recording - Viewport)
if args_cli.video: if args_cli.video:
video_kwargs = { video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"), "video_folder": os.path.join(log_dir, "videos", "play"),
@@ -151,12 +360,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
policy = runner.get_inference_policy(device=env.unwrapped.device) policy = runner.get_inference_policy(device=env.unwrapped.device)
# extract the neural network module # extract the neural network module
# we do this in a try-except to maintain backwards compatibility.
try: try:
# version 2.3 onwards
policy_nn = runner.alg.policy policy_nn = runner.alg.policy
except AttributeError: except AttributeError:
# version 2.2 and below
policy_nn = runner.alg.actor_critic policy_nn = runner.alg.actor_critic
# extract the normalizer # extract the normalizer
@@ -174,9 +380,42 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
dt = env.unwrapped.step_dt dt = env.unwrapped.step_dt
# ==========================================================================
# 初始化多相机录制器
# ==========================================================================
recorder = None
if args_cli.video:
# 1. 定义要录制的相机列表(需与 mindbot_env_cfg.py 中的命名一致)
target_cameras = [
"left_hand_camera",
"right_hand_camera",
"head_camera",
"chest_camera"
]
# 2. 定义要录制的环境索引 (env_nums)
# 默认只录制第 0 个环境,如果要录制前 2 个,改为 [0, 1]
# 注意:不要录制太多环境,否则写入视频时非常慢且占内存
envs_to_record = [0,1,2,3]
# 如果你想录制所有环境不推荐除非num_envs很小
# envs_to_record = list(range(env.unwrapped.num_envs))
save_dir = os.path.join(log_dir, "robot_camera_recordings")
recorder = MultiCameraRecorder(
env=env,
camera_names=target_cameras,
env_indices=envs_to_record,
output_dir=save_dir,
fps=int(1/dt)
)
# reset environment # reset environment
obs = env.get_observations() obs = env.get_observations()
timestep = 0 timestep = 0
print("[INFO] Starting simulation loop...")
# simulate environment # simulate environment
while simulation_app.is_running(): while simulation_app.is_running():
start_time = time.time() start_time = time.time()
@@ -186,12 +425,26 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
actions = policy(obs) actions = policy(obs)
# env stepping # env stepping
obs, _, dones, _ = env.step(actions) obs, _, dones, _ = env.step(actions)
# ==================================================================
# 每一帧录制数据
# ==================================================================
if recorder:
recorder.record_step()
# reset recurrent states for episodes that have terminated # reset recurrent states for episodes that have terminated
policy_nn.reset(dones) policy_nn.reset(dones)
if args_cli.video: if args_cli.video:
timestep += 1 timestep += 1
# Exit the play loop after recording one video # Exit the play loop after recording one video
if timestep == args_cli.video_length: if timestep == args_cli.video_length:
# ==============================================================
# 循环结束时保存所有视频
# ==============================================================
if recorder:
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
recorder.save_videos(filename_suffix=timestamp)
break break
# time delay for real-time evaluation # time delay for real-time evaluation
@@ -208,3 +461,564 @@ if __name__ == "__main__":
main() main()
# close sim app # close sim app
simulation_app.close() simulation_app.close()
# # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# # All rights reserved.
# #
# # SPDX-License-Identifier: BSD-3-Clause
# """Script to play a checkpoint if an RL agent from RSL-RL."""
# """Launch Isaac Sim Simulator first."""
# import argparse
# import sys
# import os
# import torch
# import torchvision
# import imageio
# import numpy as np
# import gymnasium as gym
# import time
# from datetime import datetime
# from isaaclab.app import AppLauncher
# # local imports
# import cli_args # isort: skip
# # add argparse arguments
# parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
# parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
# parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
# parser.add_argument(
# "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
# )
# parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
# parser.add_argument("--task", type=str, default=None, help="Name of the task.")
# parser.add_argument(
# "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
# )
# parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
# parser.add_argument(
# "--use_pretrained_checkpoint",
# action="store_true",
# help="Use the pre-trained checkpoint from Nucleus.",
# )
# parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# # append RSL-RL cli arguments
# cli_args.add_rsl_rl_args(parser)
# # append AppLauncher cli args
# AppLauncher.add_app_launcher_args(parser)
# # parse the arguments
# args_cli, hydra_args = parser.parse_known_args()
# # always enable cameras to record video
# if args_cli.video:
# args_cli.enable_cameras = True
# # clear out sys.argv for Hydra
# sys.argv = [sys.argv[0]] + hydra_args
# # launch omniverse app
# app_launcher = AppLauncher(args_cli)
# simulation_app = app_launcher.app
# """Rest everything follows."""
# from rsl_rl.runners import DistillationRunner, OnPolicyRunner
# from isaaclab.envs import (
# DirectMARLEnv,
# DirectMARLEnvCfg,
# DirectRLEnvCfg,
# ManagerBasedRLEnvCfg,
# multi_agent_to_single_agent,
# )
# from isaaclab.utils.assets import retrieve_file_path
# from isaaclab.utils.dict import print_dict
# from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
# from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
# import isaaclab_tasks # noqa: F401
# from isaaclab_tasks.utils import get_checkpoint_path
# from isaaclab_tasks.utils.hydra import hydra_task_config
# import mindbot.tasks # noqa: F401
# # ==============================================================================
# # 新增:自定义相机录制类
# # ==============================================================================
# class CameraRecorder:
# """
# 用于从 Isaac Lab 环境中录制指定相机的图像数据并保存为视频。
# """
# def __init__(self, env, camera_name: str, output_dir: str, fps: int = 30):
# self.env = env
# self.camera_name = camera_name
# self.output_dir = output_dir
# self.fps = fps
# self.frames = [] # 用于存储每一帧的 Tensor
# # 确保输出目录存在
# os.makedirs(self.output_dir, exist_ok=True)
# # 尝试从场景中获取相机对象
# # env.unwrapped 访问原始的 ManagerBasedRLEnv
# try:
# self.camera = self.env.unwrapped.scene[camera_name]
# print(f"[INFO][CameraRecorder] Successfully found camera: {camera_name}")
# except KeyError:
# print(f"[ERROR][CameraRecorder] Camera '{camera_name}' not found in scene entities!")
# print(f"Available entities: {self.env.unwrapped.scene.keys()}")
# raise
# def record_step(self):
# """在每个仿真步调用,获取图像数据"""
# # 获取 RGB 数据。注意Isaac Lab 的输出通常是 (num_envs, H, W, C)
# # 我们这里只取第一个环境 (env_index=0)
# # output["rgb"] 可能是 RGBA 或 RGB
# rgb_tensor = self.camera.data.output["rgb"][0]
# # 如果数据在 GPU 上,克隆一份并 detach防止被后续步骤覆盖
# # 此时先不转 CPU以免拖慢仿真循环最后保存时再转
# self.frames.append(rgb_tensor.clone().detach())
# def save_video(self, filename="robot_camera_video.mp4"):
# """循环结束后调用,将内存中的帧保存为视频"""
# if not self.frames:
# print("[WARN][CameraRecorder] No frames recorded.")
# return
# print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
# # 将 tensor 转换为 numpy 格式
# video_frames = []
# for frame in self.frames:
# # 转换为 CPU 上的 numpy 数组 (H, W, C)
# img = frame.cpu().numpy()
# # 处理归一化数据 [0, 1] -> [0, 255]
# if img.dtype != np.uint8:
# if img.max() <= 1.01:
# img = (img * 255).astype(np.uint8)
# else:
# img = img.astype(np.uint8)
# # 处理 RGBA -> RGB
# if img.shape[-1] == 4:
# img = img[:, :, :3]
# video_frames.append(img)
# output_path = os.path.join(self.output_dir, filename)
# # 使用 imageio 写入视频,这种方式在 Windows 上更稳定
# try:
# imageio.mimsave(output_path, video_frames, fps=self.fps)
# print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
# except Exception as e:
# print(f"[ERROR][CameraRecorder] Failed to save video using imageio: {e}")
# # 如果 imageio 也失败,尝试最后的备选方案
# print("[INFO] Attempting to save frames as images instead...")
# # ... (可选:保存为一系列图片)
# # def save_video(self, filename="robot_camera_video.mp4"):
# # """循环结束后调用,将内存中的帧保存为视频"""
# # if not self.frames:
# # print("[WARN][CameraRecorder] No frames recorded.")
# # return
# # print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
# # # 堆叠帧 -> (T, H, W, C)
# # video_tensor = torch.stack(self.frames)
# # # 处理 Alpha 通道:如果是 RGBA (4通道)只取前3个 RGB
# # if video_tensor.shape[-1] == 4:
# # video_tensor = video_tensor[..., :3]
# # # 转换为 (T, C, H, W) 以符合 torchvision 的输入要求
# # # 原始: (T, H, W, C) -> Permute -> (T, C, H, W)
# # video_tensor = video_tensor.permute(0, 3, 1, 2)
# # # 确保是 uint8 类型 [0, 255]
# # if video_tensor.dtype != torch.uint8:
# # # 如果是 float [0,1],则 * 255 并转 uint8
# # if video_tensor.max() <= 1.0:
# # video_tensor = (video_tensor * 255).to(torch.uint8)
# # else:
# # video_tensor = video_tensor.to(torch.uint8)
# # # 移动到 CPU
# # video_tensor = video_tensor.cpu()
# # output_path = os.path.join(self.output_dir, filename)
# # # 使用 torchvision 保存视频
# # torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
# # print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
# # ==============================================================================
# @hydra_task_config(args_cli.task, args_cli.agent)
# def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
# """Play with RSL-RL agent."""
# # grab task name for checkpoint path
# task_name = args_cli.task.split(":")[-1]
# train_task_name = task_name.replace("-Play", "")
# # override configurations with non-hydra CLI arguments
# agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
# env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
# # set the environment seed
# env_cfg.seed = agent_cfg.seed
# env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# # specify directory for logging experiments
# log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
# log_root_path = os.path.abspath(log_root_path)
# print(f"[INFO] Loading experiment from directory: {log_root_path}")
# if args_cli.use_pretrained_checkpoint:
# resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
# if not resume_path:
# print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
# return
# elif args_cli.checkpoint:
# resume_path = retrieve_file_path(args_cli.checkpoint)
# else:
# resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
# log_dir = os.path.dirname(resume_path)
# # set the log directory for the environment (works for all environment types)
# env_cfg.log_dir = log_dir
# # create isaac environment
# env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# # convert to single-agent instance if required by the RL algorithm
# if isinstance(env.unwrapped, DirectMARLEnv):
# env = multi_agent_to_single_agent(env)
# # wrap for video recording (standard Gym recording)
# # 注意:这只是录制 Viewport 的画面,不是你机器人相机的画面
# if args_cli.video:
# video_kwargs = {
# "video_folder": os.path.join(log_dir, "videos", "play"),
# "step_trigger": lambda step: step == 0,
# "video_length": args_cli.video_length,
# "disable_logger": True,
# }
# print("[INFO] Recording videos during training.")
# print_dict(video_kwargs, nesting=4)
# env = gym.wrappers.RecordVideo(env, **video_kwargs)
# # wrap around environment for rsl-rl
# env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
# print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# # load previously trained model
# if agent_cfg.class_name == "OnPolicyRunner":
# runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
# elif agent_cfg.class_name == "DistillationRunner":
# runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
# else:
# raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
# runner.load(resume_path)
# # obtain the trained policy for inference
# policy = runner.get_inference_policy(device=env.unwrapped.device)
# # extract the neural network module
# try:
# policy_nn = runner.alg.policy
# except AttributeError:
# policy_nn = runner.alg.actor_critic
# # extract the normalizer
# if hasattr(policy_nn, "actor_obs_normalizer"):
# normalizer = policy_nn.actor_obs_normalizer
# elif hasattr(policy_nn, "student_obs_normalizer"):
# normalizer = policy_nn.student_obs_normalizer
# else:
# normalizer = None
# # export policy to onnx/jit
# export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
# export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
# export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
# dt = env.unwrapped.step_dt
# # ==========================================================================
# # 初始化自定义相机录制器
# # ==========================================================================
# recorder = None
# if args_cli.video:
# # 这里的 output_dir 设为日志目录下的 hand_camera_videos
# save_dir = os.path.join(log_dir, "hand_camera_videos")
# # 这里的 "left_hand_camera" 必须对应 mindbot_env_cfg.py 中定义的名称
# recorder = CameraRecorder(env, "left_hand_camera", save_dir, fps=int(1/dt))
# # reset environment
# obs = env.get_observations()
# timestep = 0
# print("[INFO] Starting simulation loop...")
# # simulate environment
# while simulation_app.is_running():
# start_time = time.time()
# # run everything in inference mode
# with torch.inference_mode():
# # agent stepping
# actions = policy(obs)
# # env stepping
# obs, _, dones, _ = env.step(actions)
# # ==================================================================
# # 每一帧录制数据
# # ==================================================================
# if recorder:
# recorder.record_step()
# # reset recurrent states for episodes that have terminated
# policy_nn.reset(dones)
# if args_cli.video:
# timestep += 1
# # Exit the play loop after recording one video
# if timestep == args_cli.video_length:
# # ==============================================================
# # 循环结束时保存视频
# # ==============================================================
# if recorder:
# timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# recorder.save_video(filename=f"hand_cam_{timestamp}.mp4")
# break
# # time delay for real-time evaluation
# sleep_time = dt - (time.time() - start_time)
# if args_cli.real_time and sleep_time > 0:
# time.sleep(sleep_time)
# # close the simulator
# env.close()
# if __name__ == "__main__":
# # run the main function
# main()
# # close sim app
# simulation_app.close()
# # # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# # # All rights reserved.
# # #
# # # SPDX-License-Identifier: BSD-3-Clause
# # """Script to play a checkpoint if an RL agent from RSL-RL."""
# # """Launch Isaac Sim Simulator first."""
# # import argparse
# # import sys
# # from isaaclab.app import AppLauncher
# # # local imports
# # import cli_args # isort: skip
# # # add argparse arguments
# # parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
# # parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
# # parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
# # parser.add_argument(
# # "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
# # )
# # parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
# # parser.add_argument("--task", type=str, default=None, help="Name of the task.")
# # parser.add_argument(
# # "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
# # )
# # parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
# # parser.add_argument(
# # "--use_pretrained_checkpoint",
# # action="store_true",
# # help="Use the pre-trained checkpoint from Nucleus.",
# # )
# # parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# # # append RSL-RL cli arguments
# # cli_args.add_rsl_rl_args(parser)
# # # append AppLauncher cli args
# # AppLauncher.add_app_launcher_args(parser)
# # # parse the arguments
# # args_cli, hydra_args = parser.parse_known_args()
# # # always enable cameras to record video
# # if args_cli.video:
# # args_cli.enable_cameras = True
# # # clear out sys.argv for Hydra
# # sys.argv = [sys.argv[0]] + hydra_args
# # # launch omniverse app
# # app_launcher = AppLauncher(args_cli)
# # simulation_app = app_launcher.app
# # """Rest everything follows."""
# # import gymnasium as gym
# # import os
# # import time
# # import torch
# # from rsl_rl.runners import DistillationRunner, OnPolicyRunner
# # from isaaclab.envs import (
# # DirectMARLEnv,
# # DirectMARLEnvCfg,
# # DirectRLEnvCfg,
# # ManagerBasedRLEnvCfg,
# # multi_agent_to_single_agent,
# # )
# # from isaaclab.utils.assets import retrieve_file_path
# # from isaaclab.utils.dict import print_dict
# # from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
# # from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
# # import isaaclab_tasks # noqa: F401
# # from isaaclab_tasks.utils import get_checkpoint_path
# # from isaaclab_tasks.utils.hydra import hydra_task_config
# # import mindbot.tasks # noqa: F401
# # @hydra_task_config(args_cli.task, args_cli.agent)
# # def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
# # """Play with RSL-RL agent."""
# # # grab task name for checkpoint path
# # task_name = args_cli.task.split(":")[-1]
# # train_task_name = task_name.replace("-Play", "")
# # # override configurations with non-hydra CLI arguments
# # agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
# # env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
# # # set the environment seed
# # # note: certain randomizations occur in the environment initialization so we set the seed here
# # env_cfg.seed = agent_cfg.seed
# # env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# # # specify directory for logging experiments
# # log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
# # log_root_path = os.path.abspath(log_root_path)
# # print(f"[INFO] Loading experiment from directory: {log_root_path}")
# # if args_cli.use_pretrained_checkpoint:
# # resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
# # if not resume_path:
# # print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
# # return
# # elif args_cli.checkpoint:
# # resume_path = retrieve_file_path(args_cli.checkpoint)
# # else:
# # resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
# # log_dir = os.path.dirname(resume_path)
# # # set the log directory for the environment (works for all environment types)
# # env_cfg.log_dir = log_dir
# # # create isaac environment
# # env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# # # convert to single-agent instance if required by the RL algorithm
# # if isinstance(env.unwrapped, DirectMARLEnv):
# # env = multi_agent_to_single_agent(env)
# # # wrap for video recording
# # if args_cli.video:
# # video_kwargs = {
# # "video_folder": os.path.join(log_dir, "videos", "play"),
# # "step_trigger": lambda step: step == 0,
# # "video_length": args_cli.video_length,
# # "disable_logger": True,
# # }
# # print("[INFO] Recording videos during training.")
# # print_dict(video_kwargs, nesting=4)
# # env = gym.wrappers.RecordVideo(env, **video_kwargs)
# # # wrap around environment for rsl-rl
# # env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
# # print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# # # load previously trained model
# # if agent_cfg.class_name == "OnPolicyRunner":
# # runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
# # elif agent_cfg.class_name == "DistillationRunner":
# # runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
# # else:
# # raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
# # runner.load(resume_path)
# # # obtain the trained policy for inference
# # policy = runner.get_inference_policy(device=env.unwrapped.device)
# # # extract the neural network module
# # # we do this in a try-except to maintain backwards compatibility.
# # try:
# # # version 2.3 onwards
# # policy_nn = runner.alg.policy
# # except AttributeError:
# # # version 2.2 and below
# # policy_nn = runner.alg.actor_critic
# # # extract the normalizer
# # if hasattr(policy_nn, "actor_obs_normalizer"):
# # normalizer = policy_nn.actor_obs_normalizer
# # elif hasattr(policy_nn, "student_obs_normalizer"):
# # normalizer = policy_nn.student_obs_normalizer
# # else:
# # normalizer = None
# # # export policy to onnx/jit
# # export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
# # export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
# # export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
# # dt = env.unwrapped.step_dt
# # # reset environment
# # obs = env.get_observations()
# # timestep = 0
# # # simulate environment
# # while simulation_app.is_running():
# # start_time = time.time()
# # # run everything in inference mode
# # with torch.inference_mode():
# # # agent stepping
# # actions = policy(obs)
# # # env stepping
# # obs, _, dones, _ = env.step(actions)
# # # reset recurrent states for episodes that have terminated
# # policy_nn.reset(dones)
# # if args_cli.video:
# # timestep += 1
# # # Exit the play loop after recording one video
# # if timestep == args_cli.video_length:
# # break
# # # time delay for real-time evaluation
# # sleep_time = dt - (time.time() - start_time)
# # if args_cli.real_time and sleep_time > 0:
# # time.sleep(sleep_time)
# # # close the simulator
# # env.close()
# # if __name__ == "__main__":
# # # run the main function
# # main()
# # # close sim app
# # simulation_app.close()

210
scripts/rsl_rl/play.py.bak Normal file
View File

@@ -0,0 +1,210 @@
# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to play a checkpoint if an RL agent from RSL-RL."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# local imports
import cli_args # isort: skip
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--use_pretrained_checkpoint",
action="store_true",
help="Use the pre-trained checkpoint from Nucleus.",
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# append RSL-RL cli arguments
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import os
import time
import torch
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
"""Play with RSL-RL agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
# override configurations with non-hydra CLI arguments
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
if not resume_path:
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
return
elif args_cli.checkpoint:
resume_path = retrieve_file_path(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
log_dir = os.path.dirname(resume_path)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
runner.load(resume_path)
# obtain the trained policy for inference
policy = runner.get_inference_policy(device=env.unwrapped.device)
# extract the neural network module
# we do this in a try-except to maintain backwards compatibility.
try:
# version 2.3 onwards
policy_nn = runner.alg.policy
except AttributeError:
# version 2.2 and below
policy_nn = runner.alg.actor_critic
# extract the normalizer
if hasattr(policy_nn, "actor_obs_normalizer"):
normalizer = policy_nn.actor_obs_normalizer
elif hasattr(policy_nn, "student_obs_normalizer"):
normalizer = policy_nn.student_obs_normalizer
else:
normalizer = None
# export policy to onnx/jit
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
dt = env.unwrapped.step_dt
# reset environment
obs = env.get_observations()
timestep = 0
# simulate environment
while simulation_app.is_running():
start_time = time.time()
# run everything in inference mode
with torch.inference_mode():
# agent stepping
actions = policy(obs)
# env stepping
obs, _, dones, _ = env.step(actions)
# reset recurrent states for episodes that have terminated
policy_nn.reset(dones)
if args_cli.video:
timestep += 1
# Exit the play loop after recording one video
if timestep == args_cli.video_length:
break
# time delay for real-time evaluation
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

270
scripts/rsl_rl/play.py.bak1 Normal file
View File

@@ -0,0 +1,270 @@
import argparse
import sys
import os
import time
import imageio
import numpy as np
import h5py
import torch
# 先引入 AppLauncher并尽早实例化 SimulationApp避免 pxr 未加载
from isaaclab.app import AppLauncher
import cli_args # isort: skip
# CLI
parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出")
cli_args.add_rsl_rl_args(parser)
AppLauncher.add_app_launcher_args(parser)
args_cli, hydra_args = parser.parse_known_args()
if args_cli.video:
args_cli.enable_cameras = True
# 将 hydra 剩余参数传递
sys.argv = [sys.argv[0]] + hydra_args
# ==== 先实例化 SimulationApp ====
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
import gymnasium as gym
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
print(f"[INFO] Loading experiment from directory: {log_root_path}")
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
if not resume_path:
print("[INFO] No pre-trained checkpoint for this task.")
return
elif args_cli.checkpoint:
resume_path = retrieve_file_path(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
log_dir = os.path.dirname(resume_path)
env_cfg.log_dir = log_dir
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
runner.load(resume_path)
policy = runner.get_inference_policy(device=env.unwrapped.device)
try:
policy_nn = runner.alg.policy
except AttributeError:
policy_nn = runner.alg.actor_critic
# 导出模型
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
# dt = env.unwrapped.step_dt
# # 录制缓冲
# cam_buffers = {n: [] for n in CAM_NAMES}
# joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
# t0 = time.time()
# obs = env.get_observations()
# timestep = 0
# step_count = 0
# try:
# while simulation_app.is_running():
# start_time = time.time()
# with torch.inference_mode():
# actions = policy(obs)
# obs, _, dones, _ = env.step(actions)
# policy_nn.reset(dones)
# # 相机帧(取 env0如需全部 env 去掉 [0]
# for name in CAM_NAMES:
# if name not in env.unwrapped.scene.sensors:
# continue
# cam = env.unwrapped.scene.sensors[name]
# rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
# if rgba is None:
# continue
# frame = rgba[0].cpu().numpy()
# if frame.shape[-1] == 4:
# frame = frame[..., :3]
# cam_buffers[name].append(frame)
# # 关节 / 速度 / 动作
# robot = env.unwrapped.scene["Mindbot"]
# joint_log.append(robot.data.joint_pos.cpu().numpy())
# joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
# action_log.append(actions.cpu().numpy())
# ts_log.append(time.time() - t0)
# step_count += 1
# if args_cli.max_steps and step_count >= args_cli.max_steps:
# break
# if args_cli.video:
# timestep += 1
# if timestep == args_cli.video_length:
# break
# sleep_time = dt - (time.time() - start_time)
# if args_cli.real_time and sleep_time > 0:
# time.sleep(sleep_time)
# finally:
# # 保存 HDF5
# h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
# with h5py.File(h5_path, "w") as f:
# f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
# f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
# f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
# f.create_dataset("timestamps", data=np.array(ts_log))
# for name, frames in cam_buffers.items():
# if not frames:
# continue
# dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
# if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
# fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
# else:
# fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
# dset.attrs["fx"] = fx
# dset.attrs["fy"] = fy
# dset.attrs["cx"] = cx
# dset.attrs["cy"] = cy
# dset.attrs["focal_length"] = 1.93
# print(f"[INFO] Saved HDF5 to {h5_path}")
# # 可选:单路 MP4
# head_frames = cam_buffers["cam_head"]
# if head_frames:
# fps = int(round(1.0 / dt))
# video_path = os.path.join(log_dir, "cam_head.mp4")
# imageio.mimsave(video_path, head_frames, fps=fps)
# print(f"[INFO] Saved video to {video_path}")
# env.close()
dt = env.unwrapped.step_dt
# 录制缓冲
cam_buffers = {n: [] for n in CAM_NAMES}
joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
t0 = time.time()
obs = env.get_observations()
timestep = 0
while simulation_app.is_running():
start_time = time.time()
with torch.inference_mode():
actions = policy(obs)
obs, _, dones, _ = env.step(actions)
policy_nn.reset(dones)
# 相机帧(取 env0如需全部 env 去掉 [0]
for name in CAM_NAMES:
if name not in env.unwrapped.scene.sensors:
continue
cam = env.unwrapped.scene.sensors[name]
rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
if rgba is None:
continue
frame = rgba[0].cpu().numpy()
if frame.shape[-1] == 4:
frame = frame[..., :3]
cam_buffers[name].append(frame)
# 关节 / 速度 / 动作
robot = env.unwrapped.scene["Mindbot"]
joint_log.append(robot.data.joint_pos.cpu().numpy())
joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
action_log.append(actions.cpu().numpy())
ts_log.append(time.time() - t0)
if args_cli.video:
timestep += 1
if timestep == args_cli.video_length:
break
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# 保存 HDF5
h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
with h5py.File(h5_path, "w") as f:
f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
f.create_dataset("timestamps", data=np.array(ts_log))
for name, frames in cam_buffers.items():
if not frames:
continue
dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
# 内参:按你相机 vector 设置
if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
else:
fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
dset.attrs["fx"] = fx
dset.attrs["fy"] = fy
dset.attrs["cx"] = cx
dset.attrs["cy"] = cy
dset.attrs["focal_length"] = 1.93
print(f"[INFO] Saved HDF5 to {h5_path}")
# 可选:单路 MP4
head_frames = cam_buffers["cam_head"]
if head_frames:
fps = int(round(1.0 / dt))
video_path = os.path.join(log_dir, "cam_head.mp4")
imageio.mimsave(video_path, head_frames, fps=fps)
print(f"[INFO] Saved video to {video_path}")
env.close()
if __name__ == "__main__":
main()
simulation_app.close()

View File

@@ -18,8 +18,9 @@ from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
MINDBOT_CFG = ArticulationCfg( MINDBOT_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg( spawn=sim_utils.UsdFileCfg(
# 1. UPDATE THE USD PATH to your local file # 1. UPDATE THE USD PATH to your local file
usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd", # C:\Users\PC\workpalce\maic_usd_assets\twinlab\MIC_sim-3.0\pupet_arm_sim_product\sim_data_acquisition\usds\Collected_robot_2_3\robot_2_4.usd
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd", usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg( rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False, disable_gravity=False,
max_depenetration_velocity=1.0, max_depenetration_velocity=1.0,
@@ -66,26 +67,26 @@ MINDBOT_CFG = ArticulationCfg(
# "l_joint6": 0.0, # "l_joint6": 0.0,
# Right arm joints # Right arm joints
# -45° -> -0.7854 # -45° -> -0.7854
# "r_joint1": -0.7854, "r_joint1": -0.7854,
# # 70° -> 1.2217 # 70° -> 1.2217
# "r_joint2": 1.2217, "r_joint2": 1.2217,
# # 90° -> 1.5708 # 90° -> 1.5708
# "r_joint3": 1.5708, "r_joint3": 1.5708,
# # -90° -> -1.5708 # -90° -> -1.5708
# "r_joint4": -1.5708, "r_joint4": -1.5708,
# # 90° -> 1.5708 # 90° -> 1.5708
# "r_joint5": 1.5708, "r_joint5": 1.5708,
# # 70° -> 1.2217 # 70° -> 1.2217
# "r_joint6": 1.2217, "r_joint6": 1.2217,
# # Right arm joints # Right arm joints
"r_joint1": 0.0, # "r_joint1": 0.0,
"r_joint2": -1.5708, # # "r_joint2": -1.5708,
# "r_joint2": 0.0, # "r_joint2": 0.0,
"r_joint3": 0.0, # "r_joint3": 0.0,
"r_joint4": 0.0, # "r_joint4": 0.0,
"r_joint5": 0.0, # "r_joint5": 0.0,
"r_joint6": 0.0, # "r_joint6": 0.0,
# # left wheel # # # left wheel
# "left_b_revolute_Joint": 0.0, # "left_b_revolute_Joint": 0.0,
# "left_f_revolute_Joint": 0.0, # "left_f_revolute_Joint": 0.0,
# # right wheel # # right wheel
@@ -102,7 +103,7 @@ MINDBOT_CFG = ArticulationCfg(
# trunk # trunk
"PrismaticJoint": 0.3, "PrismaticJoint": 0.3,
# head # head
"head_revoluteJoint": 0.5236 "head_revoluteJoint": 0.0 #0.5236
}, },
# The initial (x, y, z) position of the robot's base in the world # The initial (x, y, z) position of the robot's base in the world
pos=(0, 0, 0.05), pos=(0, 0, 0.05),
@@ -114,7 +115,7 @@ MINDBOT_CFG = ArticulationCfg(
joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6 joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
stiffness=0.0, #100000.0, # Note: Tune this for desired control performance stiffness=10000.0, #10000.0, # Note: Tune this for desired control performance
damping=1000.0, #10000.0, # Note: Tune this for desired control performance damping=1000.0, #10000.0, # Note: Tune this for desired control performance
), ),
# Group for the 6 right arm joints using a regular expression # Group for the 6 right arm joints using a regular expression
@@ -122,7 +123,7 @@ MINDBOT_CFG = ArticulationCfg(
joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6 joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
stiffness=10000.0, #100000.0, # Note: Tune this for desired control performance1 stiffness=0.0, #10000.0, # Note: Tune this for desired control performance1
damping=1000.0, #10000.0, # Note: Tune this for desired control performance damping=1000.0, #10000.0, # Note: Tune this for desired control performance
), ),
"head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0), "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
@@ -142,9 +143,10 @@ MINDBOT_CFG = ArticulationCfg(
), ),
"grippers": ImplicitActuatorCfg( "grippers": ImplicitActuatorCfg(
joint_names_expr=[".*_hand_joint.*"], joint_names_expr=[".*_hand_joint.*"],
stiffness=10000.0, stiffness=10000.0, #10000.0
damping=1000.0, damping=1000.0,
effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50. effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
velocity_limit_sim=50.0,
), ),
}, },
) )

View File

@@ -0,0 +1,30 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-centrifuge-lidup-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg",
# "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
# experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
experiment_name = "mindbot_pullUltrasoundLidUp"
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,636 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import A, T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from isaaclab.sensors import CameraCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, 0.73, 0.1055],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
CENTRIFUGE_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0,
linear_damping=0.5,
angular_damping=0.5,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(
# 1. 参照机器人配置,在这里定义初始关节角度
joint_pos={
"r1": math.radians(0.0), # 转子位置(无关紧要)
# 【关键修改】:初始让盖子处于打开状态
# 您的 USD 限位是 (-100, 0)-100度为最大开启
"r2": math.radians(-100.0),
},
pos=(0.95, -0.3, 0.855),
rot=[-0.7071, 0.0, 0.0, 0.7071],
),
actuators={
"lid_passive_mechanism": ImplicitActuatorCfg(
joint_names_expr=["r2"],
# 【关键差异说明】
# 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。
#
# 但是!离心机盖子如果设为 10000它就会像焊死在空中一样机器人根本按不动。
# 这里设为 200.0 左右:
# 1. 力度足以克服重力,让盖子初始保持打开。
# 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。
stiffness=200.0,
# 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃
damping=20.0,
# 确保力矩上限不要太小,否则托不住盖子
effort_limit_sim=1000.0,
velocity_limit_sim=100.0,
),
# 转子可以硬一点,不需要被机器人按动
"rotor_control": ImplicitActuatorCfg(
joint_names_expr=["r1"],
stiffness=1000.0,
damping=10.0,
),
}
)
# CENTRIFUGE_CFG = ArticulationCfg(
# spawn=sim_utils.UsdFileCfg(
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
# rigid_props=sim_utils.RigidBodyPropertiesCfg(
# disable_gravity=False,
# max_depenetration_velocity=1.0,
# linear_damping=0.5,
# angular_damping=0.5,
# ),
# articulation_props=sim_utils.ArticulationRootPropertiesCfg(
# enabled_self_collisions=False,#
# solver_position_iteration_count=32,
# solver_velocity_iteration_count=16,
# stabilization_threshold=1e-6,
# fix_root_link=True,
# ),
# ),
# init_state=ArticulationCfg.InitialStateCfg(
# joint_pos={
# "r1": math.radians(-100.0),
# "r2": math.radians(-100.0),
# },
# pos=(0.95, -0.3, 0.855),
# rot=[-0.7071, 0.0, 0.0, 0.7071],
# ),
# # actuators={}
# actuators={
# "passive_damper": ImplicitActuatorCfg(
# # ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
# joint_names_expr=["r2"],
# # 【关键逻辑】
# # 使用较高的 Stiffness (刚度) 模拟电机或弹簧锁死在特定位置
# # 这样盖子才不会掉下来
# stiffness=200.0,
# # 适当的阻尼,防止盖子像弹簧一样疯狂抖动
# damping=20.0,
# # 确保力矩足够克服盖子的重力
# effort_limit_sim=1000.0,
# velocity_limit_sim=100.0,
# ),
# # 如果有其他关节(如转子r1),可以单独配置
# "rotor_control": ImplicitActuatorCfg(
# joint_names_expr=["r1"],
# stiffness=1000.0,
# damping=10.0,
# # joint_names_expr=[".*"],
# # stiffness=0.0,
# # damping=10.0,
# # effort_limit_sim=100.0,
# # velocity_limit_sim=100.0,
# ),
# }
# )
ROOM_CFG = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Room",
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
),
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
# Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
"""
添加相机定义
# head_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_period=1/120,
# height=480,
# width=640,
# data_type=["rgb"],
# spawn=None,
# )
"""
# left_hand_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# right_hand_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# head_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
# chest_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
# update_period=1/120,
# height=480,
# width=640,
# data_types=["rgb"],
# spawn=None,
# )
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
left_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 左臂的6个关节
body_name="left_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
# right_arm_fixed = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# # 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
# scale=0.0,
# # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
# # 对应 (135, -70, -90, 90, 90, -70)
# offset={
# "r_joint1": 2.3562,
# "r_joint2": -1.2217,
# "r_joint3": -1.5708,
# "r_joint4": 1.5708,
# "r_joint5": 1.5708,
# "r_joint6": -1.2217,
# },
# )
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.3,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移must be
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 2. 底座安装误差 (建议缩小到 2cm)
reset_mindbot_base = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"pose_range": {
"x": (-0.05, 0.05),
"y": (-0.05, 0.05),
"z": (0.74, 0.75),
"yaw": (-0.1, 0.1),
},
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
},
)
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
"position_range": (-0.01, 0.01),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_centrifuge = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("centrifuge"),
"pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
reset_lid = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("lid"),
"pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "left_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 1.0, 0.0),
"max_angle_deg": 85.0, # 允许60度内的偏差
},
weight=5.0 #original 5.0
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度
},
weight=3.0 #original 3.0
)
approach_lid_penalty = RewTerm(
func=mdp.gripper_lid_distance_penalty,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07,
},
# weight 为负数表示惩罚。
# 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。
weight=-1.0
)
# 【新增】精细对齐 (引导进入 2cm 圈)
gripper_lid_fine_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
},
weight=10.0 # 高权重
)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
"target_close_pos": 0.03,
"height_offset": 0.04,
"grasp_radius": 0.03,
},
weight=10.0
)
lid_lifted_reward = RewTerm(
func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
params={
"lid_cfg": SceneEntityCfg("lid"),
"minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
},
weight=10.0 # 给一个足够大的权重
)
lid_lifting_reward = RewTerm(
func=mdp.lid_lift_progress_reward,
params={
"lid_cfg": SceneEntityCfg("lid"),
"initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
"target_height_lift": 0.2,
"height_offset": 0.07, # 与其他奖励保持一致
"std": 0.1
},
# 权重设大一点,告诉它这是最终目标
weight=10.0
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
# lid_fly_away = DoneTerm(
# func=mdp.base_height_failure,
# params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
# )
# 新增:盖子掉落判定
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
# lid_dropped = DoneTerm(
# func=mdp.base_height_failure, # 复用高度判定函数
# params={
# "asset_cfg": SceneEntityCfg("lid"),
# "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
# },
# )
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,881 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.1,
# std: float = 0.1) -> torch.Tensor:
# """奖励函数夹爪相对于lid的位置对齐。
# Args:
# env: 环境实例
# lid_cfg: lid的配置
# robot_cfg: 机器人的配置
# left_gripper_name: 左侧夹爪body名称
# right_gripper_name: 右侧夹爪body名称
# side_distance: Y方向的期望距离
# height_offset: Z方向的期望高度偏移
# std: tanh核函数的标准差
# Returns:
# 位置对齐奖励 (num_envs,)
# """
# lid: RigidObject = env.scene[lid_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3]
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # 计算相对位置(夹爪中心相对于 lid 中心)
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# # X 方向位置:应该对齐(接近 0
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
# x_reward = 1.0 - torch.tanh(x_dist / std)
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
# # y_error = torch.abs(y_dist - side_distance)
# # y_reward = 1.0 - torch.tanh(y_error / std)
# y_reward = 1.0 - torch.tanh(y_dist / std)
# # Z 方向位置:应该在 lid 上方 height_offset 处
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
# z_reward = 1.0 - torch.tanh(z_error / std)
# # 组合位置奖励
# position_reward = x_reward * y_reward * z_reward
# return position_reward
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数夹爪相对于lid的位置对齐简化版
计算夹爪中心到目标点lid上方height_offset处的距离奖励。
Args:
env: 环境实例
lid_cfg: lid的配置
robot_cfg: 机器人的配置
left_gripper_name: 左侧夹爪body名称
right_gripper_name: 右侧夹爪body名称
height_offset: Z方向的期望高度偏移目标点在lid上方
std: tanh核函数的标准差
Returns:
位置对齐奖励 (num_envs,)
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 目标点lid位置 + height_offsetZ方向
target_pos = lid_pos_w.clone()
# print(target_pos[0])
target_pos[:, 2] += height_offset # 在lid上方height_offset处
# print(target_pos[0])
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# print(gripper_center[0])
# 计算夹爪中心到目标点的3D距离
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 距离奖励:距离越小,奖励越大
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# ... (保留原有的 import)
# ==============================================================================
# 新增:直接距离惩罚 (线性引导)
# ==============================================================================
def gripper_lid_distance_penalty(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.07,
) -> torch.Tensor:
"""
计算夹爪中心到目标点的欧式距离。
返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
这样:距离越远 -> 惩罚越大 (Total Reward 越低)。
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# 1. 目标点 (Lid中心 + 高度偏移)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 2. 夹爪中心
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_pos + right_pos) / 2.0
# 3. 计算距离
distance = torch.norm(gripper_center - lid_pos_w, dim=1)
return distance
# ==============================================================================
# 优化:位置对齐 (保持原函数,但确保逻辑清晰)
# ==============================================================================
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
target_pos = lid_pos_w.clone()
target_pos[:, 2] += height_offset
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# def gripper_lid_orientation_alignment(
# env,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# gripper_body_name: str = "left_hand_body",
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
# ) -> torch.Tensor:
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 获取姿态
# lid_quat_w = lid.data.root_quat_w
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# # 3. 垂直对齐 (Vertical Alignment)
# # 目标:夹爪 +Y 指向 World -Z
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
# # print(grip_fwd_world[0])
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
# # rew_vertical = (dot_vertical + 1.0) / 2.0
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
# # nan -> 0
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# # 4. 偏航对齐 (Yaw Alignment)
# # 目标:夹爪 +Z 平行于 Lid +Y
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# # nan -> 0
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# # 5. 组合
# total_reward = rew_vertical * rew_yaw
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# # debug
# if not torch.isfinite(total_reward).all():
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
# print(f"rew_vertical: {rew_vertical}")
# print(f"rew_yaw: {rew_yaw}")
# return total_reward
def gripper_lid_orientation_alignment(
env,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
) -> torch.Tensor:
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 获取姿态
lid_quat_w = lid.data.root_quat_w
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# 3. 垂直对齐 (Vertical Alignment)
# 目标:夹爪前向轴指向 World -Z向下
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# 计算点积(归一化后,点积 = cos(角度)
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
# 计算角度(弧度)
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
# 如果角度 <= max_angle_deg给予奖励
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
# nan -> 0
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# 4. 偏航对齐 (Yaw Alignment)
# 目标:夹爪 +Z 平行于 Lid +Y
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_vertical * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
# # 注意:确保 joint_names 只包含那两个夹爪关节
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
# ) -> torch.Tensor:
# """
# 逻辑很简单:
# 1. 如果 距离 < close_threshold目标关节位置 = 0.03 (闭合)
# 2. 如果 距离 >= close_threshold目标关节位置 = 0.0 (张开)
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算距离 (末端 vs Lid中心)
# lid_pos_w = lid.data.root_pos_w[:, :3]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # distance: (num_envs,)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 3. 动态确定目标关节值
# # 如果距离小于阈值,目标设为 close_pos否则设为 open_pos
# # target_val: (num_envs, 1)
# # print(distance)
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 4. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# # joint_pos: (num_envs, 2)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# # 取绝对值防止左指是负数的情况虽然你的配置里范围看起来都是正的加上abs更保险
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 5. 计算误差
# # error: (num_envs,) -> 两个手指误差的平均值
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # # 6. 返回负奖励 (惩罚)
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
# # return -error
# # =====================================================
# # 🚀 核心修复:安全输出
# # =====================================================
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
# reward = -torch.tanh(error / 0.01)
# # 2. 过滤 NaN这一步能救命
# # 如果物理引擎算出 NaN这里会把它变成 0.0,防止训练崩溃
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
# return reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05,
# target_open_pos: float = 0.0,
# target_close_pos: float = 0.03,
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
# ) -> torch.Tensor:
# """
# 逻辑:
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
# 2. 如果距离 < close_threshold目标关节位置 = 闭合。
# 3. 否则:目标关节位置 = 张开。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
# # 获取夹爪位置
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # 3. 计算距离 (夹爪 vs 目标抓取点)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 4. 动态确定目标关节值
# # 靠近抓取点 -> 闭合;远离 -> 张开
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 5. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 6. 计算误差并返回奖励
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # 使用 tanh 限制数值范围
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
# return torch.nan_to_num(reward, nan=0.0)
def _is_grasped(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪Z轴高度合适不能压在盖子上面 (Z-Axis Check)
"""
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
is_near = dist_to_handle < grasp_radius
# 5. 条件二Z轴高度判定 (防止压在盖子上)
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
# 获取关节位置 (绝对值)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# === 修改闭合判定 ===
# 1. 关节位置判定 (Effort Check)
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
# 假设 0.05 是你的新 target_close_pos
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
# 获取指尖位置
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
# 假设把手厚度是 1cm (0.01m)
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
# 如果指尖距离 >= 0.005,说明中间有东西挡着
is_not_empty = dist_fingertips > 0.005
# 综合判定:
# 1. 在把手附近 (is_near)
# 2. 高度合适 (is_z_valid)
# 3. 在用力闭合 (is_effort_closing)
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
return is_near & is_z_valid & is_effort_closing & is_not_empty
def gripper_close_when_near(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
target_close_pos: float = 0.03,
height_offset: float = 0.02,
grasp_radius: float = 0.05
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取当前关节位置绝对值 (num_envs, num_joints)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# 计算两个指关节的平均闭合深度
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
# 计算奖励系数:当前位置 / 目标闭合位置
# 这样当关节越接近 0.03 时,奖励越接近 1.0
# 强制让 Agent 产生“压满”动作的冲动
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
# 如果没对准或者没夹到,奖励依然是 0
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds

View File

@@ -177,200 +177,11 @@ def gripper_handle_orientation_alignment(
# debug # debug
if not torch.isfinite(total_reward).all(): if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}") print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_forward}") print(f"rew_vertical: {dot_fwd}")
print(f"rew_yaw: {rew_yaw}") print(f"rew_yaw: {rew_yaw}")
return total_reward return total_reward
# # 计算点积:我们希望它们垂直,即点积为 0
# dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
# # 奖励1.0 - abs(dot)。点积越小(越垂直),分数越高
# rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
# # 稍微增强一下敏感度,让它更倾向于完美垂直
# rew_perpendicular = rew_perpendicular * rew_perpendicular
# # 3. 组合奖励
# total_reward = rew_forward * rew_perpendicular
# return torch.nan_to_num(total_reward, nan=0.0)
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.03,
# std: float = 0.1) -> torch.Tensor:
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# # lid_pos_w = lid.data.root_pos_w[:, :3]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# # [num_envs, 3]
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# # 目标点lid位置 + height_offsetZ方向
# target_pos = handle_pos_w.clone()
# # print(target_pos[0])
# target_pos[:, 0] -= height_offset # 在lid上方height_offset处
# # print(target_pos[0])
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # print(gripper_center[0])
# # 计算夹爪中心到目标点的3D距离
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# # 距离奖励:距离越小,奖励越大
# position_reward = 1.0 - torch.tanh(distance / std)
# return position_reward
# def gripper_handle_position_alignment(
# env,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_finger_name: str = "right_hand_l",
# right_finger_name: str = "right_hand__r",
# # 【修改点1】根据你的描述长轴是 Local Z所以改为 (0, 0, 1)
# handle_local_axis: tuple = (0.0, 0.0, 1.0),
# # 【新增】:明确指定手柄的“侧面”是 Local X 轴
# handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
# grasp_width: float = 0.03,
# grasp_offset: float = 0.06,
# std: float = 0.05,
# ) -> torch.Tensor:
# """
# 修正版:
# 1. 修正了手柄长轴为 Local Z。
# 2. 使用 Local X 作为手柄两侧的基准方向。
# """
# # 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# # [num_envs, 3]
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# # [num_envs, 4]
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
# # 2. 计算关键向量
# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z)
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转
# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
# # 注意如果你的烘箱放置位置旋转了这里也建议改为读取机器人的前方或者物体的Local方向
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
# # 3. 计算理想的抓取点
# # 抓取中心从手柄中心沿着抓取方向World X反向退后 grasp_offset
# grasp_center = handle_pos_w - approach_dir * grasp_offset
# # 计算理想的左/右手指位置
# # 既然 side_vec_world 是 Local X (对应 World Y)
# # 我们希望手指在 Y 轴两侧张开
# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。
# # 4. 计算距离奖励
# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
# rew_l = 1.0 - torch.tanh(dist_l / std)
# rew_r = 1.0 - torch.tanh(dist_r / std)
# total_reward = (rew_l + rew_r) / 2.0
# return torch.nan_to_num(total_reward, nan=0.0)
# def gripper_handle_position_alignment(
# env,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_finger_name: str = "right_hand_l", # 左手指 body 名称
# right_finger_name: str = "right_hand__r", # 右手指 body 名称
# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
# grasp_width: float = 0.03, # 预期的抓取宽度 (米)
# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移
# std: float = 0.05, # 奖励函数的敏感度
# ) -> torch.Tensor:
# """
# 双指位置奖励:引导左/右手指分别到达手柄的两侧。
# """
# # 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
# # 2. 计算理想的抓取点
# # 获取手柄当前轴向
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向
# side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间)
# grasp_center = handle_pos_w - approach_dir * grasp_offset
# # 计算理想的左/右手指位置 (在中心点两侧张开)
# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右
# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
# # 3. 计算距离奖励
# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
# rew_l = 1.0 - torch.tanh(dist_l / std)
# rew_r = 1.0 - torch.tanh(dist_r / std)
# # 取平均值
# total_reward = (rew_l + rew_r) / 2.0
# return torch.nan_to_num(total_reward, nan=0.0)
# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name # 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name
def _is_grasped( def _is_grasped(
@@ -400,8 +211,8 @@ def _is_grasped(
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True) handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone() handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
handle_pos_w[:, 1] += height_offset # X方向偏移 handle_pos_w[:, 1] += height_offset # X方向偏移
handle_pos_w[:, 0] += 0.02 handle_pos_w[:, 0] += 0.005
handle_pos_w[:, 2] -= 0.04 # handle_pos_w[:, 2] -= 0.04
# 3. 夹爪位置 # 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name]) l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name]) r_ids, _ = robot.find_bodies([right_gripper_name])
@@ -411,7 +222,7 @@ def _is_grasped(
# 4. 条件一:距离判定 (在把手球范围内) # 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1) dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
if env.common_step_counter % 100 == 0: if env.common_step_counter % 500 == 0:
print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m") print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
is_near = dist_to_handle < grasp_radius is_near = dist_to_handle < grasp_radius
@@ -479,80 +290,51 @@ def gripper_close_when_near(
# def gripper_close_when_near( # def gripper_close_when_near(
# env: ManagerBasedRLEnv, # env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"), # dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), # robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定 # handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名 # left_gripper_name: str = "right_hand_l",
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名 # right_gripper_name: str = "right_hand__r",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], # joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 保留接口但不强依赖
# target_close_pos: float = 0.03, # # 关键参数
# height_offset: float = 0.03, # handle_thickness: float = 0.0388, # 把手厚度≈3.88cm
# grasp_radius: float = 0.02 # 球面半径 2cm # thickness_tol: float = 0.008, # 容差≈0.8cm,控制惩罚/奖励斜率
# grasp_radius: float = 0.04, # 抓取判定半径,略大于把手厚度
# height_offset: float = 0.00, # 如需上下偏移可调整
# target_close_pos: float = 0.03, # 保留给 joint/clamping 兼容
# ) -> torch.Tensor: # ) -> torch.Tensor:
# dryingbox = env.scene[dryingbox_cfg.name]
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name] # robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置 # # 目标位置(把手中心,可做微偏移)
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
# handle_pos[:, 1] += height_offset
# # 手指位置与中心
# l_ids, _ = robot.find_bodies([left_gripper_name]) # l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name]) # r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3] # pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3] # pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# center = (pos_L + pos_R) / 2.0
# # 计算夹爪中心 # # 到把手的距离(只在近距离才给分)
# pos_center = (pos_L + pos_R) / 2.0 # dist = torch.norm(center - handle_pos, dim=-1)
# is_near = dist < grasp_radius
# # 2. 距离判定 (Is Inside Sphere?) # # 实际两指间距
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1) # finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?) # # 奖励:两指间距越接近把手厚度越好,平滑衰减
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间 # error = torch.abs(finger_dist - handle_thickness)
# vec_LR = pos_R - pos_L # 左指指向右指 # close_reward = 1.0 - torch.tanh(error / thickness_tol)
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t # # 避免 NaN
# # P_proj = P_L + t * (P_R - P_L) # close_reward = torch.nan_to_num(close_reward, nan=0.0)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6 # return torch.where(is_near, close_reward, torch.zeros_like(close_reward))
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted( def lid_is_lifted(
@@ -698,39 +480,7 @@ def lid_lift_progress_reward(
return reward return reward
# def lid_grasped_bonus(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# left_gripper_name: str,
# joint_names: list,
# distance_threshold: float = 0.05,
# closed_pos: float = 0.03
# ) -> torch.Tensor:
# # 1. 计算距离
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# lid_pos = lid.data.root_pos_w[:, :3]
# distance = torch.norm(gripper_pos - lid_pos, dim=-1)
# # 2. 检查距离是否达标
# is_close = distance < distance_threshold
# # 3. 检查夹爪是否闭合
# joint_ids, _ = robot.find_joints(joint_names)
# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# # 检查是否接近闭合 (比如 > 0.02)
# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
# # 4. 给分
# # 只有同时满足才给 1.0
# reward = torch.where(is_close & is_closed, 1.0, 0.0)
# return torch.nan_to_num(reward)
# 在 rewards.py 文件开头添加调试函数
def debug_robot_physics( def debug_robot_physics(
env: ManagerBasedRLEnv, env: ManagerBasedRLEnv,
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"), robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
@@ -771,64 +521,15 @@ def debug_robot_physics(
# 4. 检查是否有异常高的速度(可能导致碰撞问题) # 4. 检查是否有异常高的速度(可能导致碰撞问题)
high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3) high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
if torch.any(high_vel_mask): # if torch.any(high_vel_mask):
env_ids = torch.where(high_vel_mask)[0].cpu().tolist() # env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
for env_id in env_ids[:3]: # 只打印前3个环境 # for env_id in env_ids[:3]: # 只打印前3个环境
print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:") # print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)") # print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)") # print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)") # print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)") # print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s") # print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "right_hand_l",
# right_gripper_name: str = "right_hand__r",
# height_offset: float = 0.03,
# std: float = 0.1) -> torch.Tensor:
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# target_pos = handle_pos_w.clone()
# target_pos[:, 1] += height_offset
# target_pos[:, 0] += 0.02
# target_pos[:, 2] -= 0.04
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# # ===== 添加调试打印 =====
# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
# close_mask = distance < 0.03
# if torch.any(close_mask):
# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
# # ========================
# position_reward = 1.0 - torch.tanh(distance / std)
# # 计算距离后插入
# if env.common_step_counter % 200 == 0:
# print("pos_align dbg env0: "
# f"handle={handle_pos_w[0].cpu().numpy()}, "
# f"target={target_pos[0].cpu().numpy()}, "
# f"gripL={left_gripper_pos[0].cpu().numpy()}, "
# f"gripR={right_gripper_pos[0].cpu().numpy()}, "
# f"center={gripper_center[0].cpu().numpy()}, "
# f"dist={distance[0].item():.4f}")
# return position_reward
def gripper_handle_position_alignment(env: ManagerBasedRLEnv, def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
@@ -846,8 +547,8 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :] handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
target_pos = handle_pos_w.clone() target_pos = handle_pos_w.clone()
target_pos[:, 1] += height_offset target_pos[:, 1] += height_offset
# target_pos[:, 0] += 0.02 target_pos[:, 0] += 0.005
target_pos[:, 2] -= 0.02 # target_pos[:, 2] -= 0.04
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True) left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True) right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
@@ -878,3 +579,257 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
f"dist={distance[0].item():.4f}") f"dist={distance[0].item():.4f}")
return position_reward return position_reward
def gripper_open_when_far(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用
robot_cfg: SceneEntityCfg,
handle_name: str, # 保留接口一致性,但不再使用
left_gripper_name: str,
right_gripper_name: str,
joint_names: list, # 不再使用
dist_threshold: float = 0.5, # 不再使用
target_open_dist: float = 0.06, # 两手指完全打开时的距离(米)
) -> torch.Tensor:
"""
奖励夹爪最大张开状态:
只根据左右两根手指之间的实际距离计算奖励,
当距离接近 target_open_dist (0.06m) 时奖励最大。
"""
# 1. 获取机器人
robot = env.scene[robot_cfg.name]
# 2. 获取左右手指的世界坐标
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# 3. 计算两手指之间的实际距离
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# 4. 奖励:距离越接近最大打开距离,奖励越高
error = torch.abs(finger_dist - target_open_dist)
reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度
return reward
def gripper_open_penalty_when_far(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
handle_name: str,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
dist_threshold: float = 0.03, # 抓取豁免区(在此距离内不惩罚)
# max_penalty_dist: float = 0.2, # 这个参数现在没用了,或者可以用来控制斜率,这里简化掉
) -> torch.Tensor:
"""
修正后的惩罚项:
在 dist_threshold (3cm) 之外,如果夹爪没有完全张开,则给予惩罚。
"""
robot = env.scene[robot_cfg.name]
dryingbox = env.scene[dryingbox_cfg.name]
# 1. 获取位置
handle_ids, _ = dryingbox.find_bodies([handle_name])
handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# gripper_center = (pos_L + pos_R) / 2.0
# distance = torch.norm(gripper_center - handle_pos, dim=1)
# 优化计算:直接用预计算的指尖中心(如果你的环境支持),或者保持原样
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
distance = torch.norm(gripper_center - handle_pos, dim=1)
# 2. 计算闭合程度 (0=全开, 1=全闭)
# 获取关节位置更直接,比算指尖距离更准
joint_ids, _ = robot.find_joints(joint_names)
joint_pos = robot.data.joint_pos[:, joint_ids]
# 假设 0.0 是开0.03 是闭。取平均绝对值归一化。
# closedness 越大表示闭合越多
closedness = torch.clamp(torch.mean(torch.abs(joint_pos), dim=1) / 0.03, 0.0, 1.0)
# 3. 惩罚门控逻辑 (修正的核心)
# 如果 distance > dist_threshold开始惩罚
# 使用 tanh 或 sigmoid 平滑过渡,避免硬开关导致的震荡
# 当 dist = 0.03 时gate = 0
# 当 dist = 0.05 时gate 接近 1
is_far_gate = torch.tanh((distance - dist_threshold) * 100.0)
# clip 到 [0, 1],防止 distance < threshold 时出现负数干扰
is_far_gate = torch.clamp(is_far_gate, 0.0, 1.0)
# 4. 计算最终惩罚
# 只有在 "远处" 且 "闭合" 时才扣分
penalty = is_far_gate * closedness
return -penalty
# def gripper_open_penalty_when_far(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# handle_name: str,
# left_gripper_name: str,
# right_gripper_name: str,
# joint_names: list,
# dist_threshold: float = 0.03, # 3cm 抓取保护区
# max_penalty_dist: float = 0.2, # 0.55m 惩罚激活边界
# ) -> torch.Tensor:
# """
# 使用 Sigmoid 激活函数的惩罚项:
# 1. 距离把手 < 0.55m 时,惩罚迅速激活并保持在高位 (接近 1.0)。
# 2. 距离把手 < 0.03m 时,惩罚强制清零,允许闭合抓取。
# 3. 两手指距离越小,惩罚值越大。
# """
# robot = env.scene[robot_cfg.name]
# dryingbox = env.scene[dryingbox_cfg.name]
# # 1. 获取位置信息
# handle_ids, _ = dryingbox.find_bodies([handle_name])
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 2. 计算距离
# gripper_center = (pos_L + pos_R) / 2.0
# distance = torch.norm(gripper_center - handle_pos, dim=1)
# finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# # 3. 距离激活门控 (Sigmoid 方法)
# # 当 distance < 0.55 时,(0.55 - dist) 为正sigmoid 趋近于 1.0
# # 50.0 是陡峭系数系数越大0.55m 处的切换越像开关
# dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0)
# # 4. 抓取区保护 (Deadzone)
# # 距离小于 3cm 时is_not_near 变为 0完全撤销惩罚
# is_not_near = (distance > dist_threshold).float()
# # 5. 闭合程度计算 (0 = 全开, 1 = 全闭)
# # 假设手指最大张开距离为 0.06m
# max_open = 0.06
# closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0)
# # 6. 综合计算并 Clamp
# # 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时,值才会很大
# penalty_magnitude = dist_gate * is_not_near * closedness
# # 返回负值作为惩罚 (值域 [-1, 0])
# return -torch.clamp(penalty_magnitude, 0.0, 1.0)
# def gripper_open_until_near(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# handle_name: str,
# left_gripper_name: str,
# right_gripper_name: str,
# joint_names: list,
# dist_threshold: float = 0.03, # 3cm 阈值
# ) -> torch.Tensor:
# """奖励夹爪在远离目标时保持打开状态,接近后不再约束。"""
# robot = env.scene[robot_cfg.name]
# dryingbox = env.scene[dryingbox_cfg.name]
# # 1. 计算夹爪中心到把手的距离
# handle_ids, _ = dryingbox.find_bodies([handle_name])
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
# distance = torch.norm(gripper_center - handle_pos, dim=1)
# # 2. 获取夹爪当前关节位置 (假设 0 是完全打开)
# joint_ids, _ = robot.find_joints(joint_names)
# # 计算当前关节位置与 0打开状态的偏差
# joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# mean_error = torch.mean(joint_pos_abs, dim=1)
# # 3. 计算奖励逻辑
# # 距离 > 3cm 时:关节越接近 0打开奖励越高 (最大为 1.0)
# # 距离 <= 3cm 时:奖励为 0不干涉模型闭合
# is_far = distance > dist_threshold
# open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减
# return torch.where(is_far, open_reward, torch.zeros_like(open_reward))
# ... 在文件末尾添加以下函数 ...
def gripper_grasp_stability(
env: ManagerBasedRLEnv,
robot_cfg: SceneEntityCfg,
joint_names: list,
vel_threshold: float = 0.005 # 极低的速度阈值,判定为“静止”
) -> torch.Tensor:
"""
奖励夹爪在闭合状态下的稳定性。
只有当关节速度极小时(即没有在抖动或移动),才发放奖励。
"""
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取夹爪关节的绝对速度
joint_vel = torch.abs(robot.data.joint_vel[:, joint_ids])
# 判定是否所有夹爪关节都处于“静止”状态
is_stable = torch.all(joint_vel < vel_threshold, dim=1)
# 只有在闭合状态下谈稳定性才有意义,所以通常建议配合 is_grasped 使用
# 这里我们只返回稳定性系数
return is_stable.float()
def gripper_close_interaction_v2(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
handle_name: str,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
target_close_pos: float = 0.012,
height_offset: float = 0.00,
grasp_radius: float = 0.05
) -> torch.Tensor:
"""
增强版闭合奖励:结合了位置、闭合程度和物理稳定性。
"""
# 1. 基础抓取判定
is_grasped = _is_grasped(
env, dryingbox_cfg, robot_cfg, handle_name,
left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算闭合程度 (0.0 到 1.0)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 计算稳定性因子 (速度越快,奖励衰减越厉害)
joint_vel = torch.norm(robot.data.joint_vel[:, joint_ids], dim=1)
# 使用 exp(-x) 让奖励对速度非常敏感,速度稍大奖励就迅速消失
stability_factor = torch.exp(-20.0 * joint_vel)
# 综合奖励:必须满足抓取条件,且闭合得越深、动作越稳,分数越高
return torch.where(is_grasped, clamping_factor * stability_factor, torch.zeros_like(clamping_factor))

View File

@@ -0,0 +1,967 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
def gripper_handle_orientation_alignment(
env,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C",
gripper_body_name: str = "right_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 夹爪指尖指向 (Local Z)
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 夹爪手指开合轴 (Local Y)
lid_handle_axis: tuple = (0.0, 0.0, 1.0),
max_angle_deg: float =60.0,
# handle_local_axis 参数已删除,因为直接使用世界坐标系 Z 轴更稳健
) -> torch.Tensor:
"""
修正版:
1. 解决了梯度消失:使用 (dot+1)/2 替代 clamp保证全角度有梯度。
2. 解决了轴向错误:直接认定手柄是垂直的 (World Z),修复抓取姿态。
"""
# 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它,节省计算)
robot = env.scene[robot_cfg.name]
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
box=env.scene[dryingbox_cfg.name]
hd_ids,_=box.find_bodies([handle_name],preserve_order=True)
handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:]
# --- 约束 A: 指向约束 (Pointing) ---
# 目标:夹爪 Z 轴 指向 World +X
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1)
dot_fwd=torch.clamp(dot_fwd,-1.0,1.0)
angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_forward = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward))
# nan -> 0
rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward)
# [关键修改 1] 使用软化奖励,提供全局梯度
# 范围 [0, 1]: 1.0 是完美对齐0.5 是垂直0.0 是完全背对
# 这样即使现在是指向地面的 (dot=0),它也有 0.5 分,且知道往上抬分会变高
# rew_forward = (dot_fwd + 1.0) / 2.0
# 如果你希望能更陡峭一点,可以加个平方: rew_forward = rew_forward * rew_forward
# --- 约束 B: 抓取姿态约束 (Grasp Alignment) ---
# 目标:夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z)
# 这样手指就是水平开合的
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local))
# [关键修改 2] 强制假设手柄是垂直的 (World Z)
# 除非你的烘箱会倾斜倒在地上,否则这是最稳健的写法,不用管局部坐标系
handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1)
handle_axis_world=quat_apply(handle_quat_w,handle_axis_local)
dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1)
rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_forward * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# # 计算点积:我们希望它们垂直,即点积为 0
# dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
# # 奖励1.0 - abs(dot)。点积越小(越垂直),分数越高
# rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
# # 稍微增强一下敏感度,让它更倾向于完美垂直
# rew_perpendicular = rew_perpendicular * rew_perpendicular
# # 3. 组合奖励
# total_reward = rew_forward * rew_perpendicular
# return torch.nan_to_num(total_reward, nan=0.0)
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.03,
# std: float = 0.1) -> torch.Tensor:
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# # lid_pos_w = lid.data.root_pos_w[:, :3]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# # [num_envs, 3]
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# # 目标点lid位置 + height_offsetZ方向
# target_pos = handle_pos_w.clone()
# # print(target_pos[0])
# target_pos[:, 0] -= height_offset # 在lid上方height_offset处
# # print(target_pos[0])
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # print(gripper_center[0])
# # 计算夹爪中心到目标点的3D距离
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# # 距离奖励:距离越小,奖励越大
# position_reward = 1.0 - torch.tanh(distance / std)
# return position_reward
# def gripper_handle_position_alignment(
# env,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_finger_name: str = "right_hand_l",
# right_finger_name: str = "right_hand__r",
# # 【修改点1】根据你的描述长轴是 Local Z所以改为 (0, 0, 1)
# handle_local_axis: tuple = (0.0, 0.0, 1.0),
# # 【新增】:明确指定手柄的“侧面”是 Local X 轴
# handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
# grasp_width: float = 0.03,
# grasp_offset: float = 0.06,
# std: float = 0.05,
# ) -> torch.Tensor:
# """
# 修正版:
# 1. 修正了手柄长轴为 Local Z。
# 2. 使用 Local X 作为手柄两侧的基准方向。
# """
# # 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# # [num_envs, 3]
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# # [num_envs, 4]
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
# # 2. 计算关键向量
# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z)
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转
# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
# # 注意如果你的烘箱放置位置旋转了这里也建议改为读取机器人的前方或者物体的Local方向
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
# # 3. 计算理想的抓取点
# # 抓取中心从手柄中心沿着抓取方向World X反向退后 grasp_offset
# grasp_center = handle_pos_w - approach_dir * grasp_offset
# # 计算理想的左/右手指位置
# # 既然 side_vec_world 是 Local X (对应 World Y)
# # 我们希望手指在 Y 轴两侧张开
# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。
# # 4. 计算距离奖励
# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
# rew_l = 1.0 - torch.tanh(dist_l / std)
# rew_r = 1.0 - torch.tanh(dist_r / std)
# total_reward = (rew_l + rew_r) / 2.0
# return torch.nan_to_num(total_reward, nan=0.0)
# def gripper_handle_position_alignment(
# env,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_finger_name: str = "right_hand_l", # 左手指 body 名称
# right_finger_name: str = "right_hand__r", # 右手指 body 名称
# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
# grasp_width: float = 0.03, # 预期的抓取宽度 (米)
# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移
# std: float = 0.05, # 奖励函数的敏感度
# ) -> torch.Tensor:
# """
# 双指位置奖励:引导左/右手指分别到达手柄的两侧。
# """
# # 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
# # 2. 计算理想的抓取点
# # 获取手柄当前轴向
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向
# side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间)
# grasp_center = handle_pos_w - approach_dir * grasp_offset
# # 计算理想的左/右手指位置 (在中心点两侧张开)
# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右
# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
# # 3. 计算距离奖励
# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
# rew_l = 1.0 - torch.tanh(dist_l / std)
# rew_r = 1.0 - torch.tanh(dist_r / std)
# # 取平均值
# total_reward = (rew_l + rew_r) / 2.0
# return torch.nan_to_num(total_reward, nan=0.0)
# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name
def _is_grasped(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg, # 改为 dryingbox_cfg
robot_cfg: SceneEntityCfg,
handle_name: str, # 新增 handle_name 参数
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪X轴距离合适不能压在把手上 (X-Axis Check)
"""
# 1. 获取对象
dryingbox = env.scene[dryingbox_cfg.name] # 改为 dryingbox
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
handle_pos_w[:, 1] += height_offset # X方向偏移
handle_pos_w[:, 0] += 0.02
# handle_pos_w[:, 2] -= 0.04
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
if env.common_step_counter % 100 == 0:
print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
is_near = dist_to_handle < grasp_radius
# 5. 条件二X轴距离判定 (防止压在把手上)
x_diff = gripper_center[:, 0] - handle_pos_w[:, 0]
is_x_valid = torch.abs(x_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
is_not_empty = dist_fingertips > 0.005
# 在计算 dist_to_handle 后
if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15:
delta = gripper_center[0] - handle_pos_w[0]
print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, "
f"center={gripper_center[0].cpu().numpy()}, "
f"delta={delta.cpu().numpy()}, "
f"dist={dist_to_handle[0].item():.4f}, "
f"fingertip_dist={dist_fingertips[0].item():.4f}, "
f"joint_pos={joint_pos[0].cpu().numpy()}")
# 综合判定
return is_near & is_x_valid & is_effort_closing & is_not_empty
# 修改 gripper_close_when_near 函数
def gripper_close_when_near(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), # 改为 dryingbox_cfg
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C", # 新增 handle_name 参数
left_gripper_name: str = "right_hand_l", # 更新默认值
right_gripper_name: str = "right_hand__r", # 更新默认值
joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 更新默认值
target_close_pos: float = 0.03,
height_offset: float = 0.005, # 更新默认值
grasp_radius: float = 0.03 # 更新默认值
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, dryingbox_cfg, robot_cfg, handle_name, # 传入 handle_name
left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward
# def lid_grasped_bonus(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# left_gripper_name: str,
# joint_names: list,
# distance_threshold: float = 0.05,
# closed_pos: float = 0.03
# ) -> torch.Tensor:
# # 1. 计算距离
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# lid_pos = lid.data.root_pos_w[:, :3]
# distance = torch.norm(gripper_pos - lid_pos, dim=-1)
# # 2. 检查距离是否达标
# is_close = distance < distance_threshold
# # 3. 检查夹爪是否闭合
# joint_ids, _ = robot.find_joints(joint_names)
# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# # 检查是否接近闭合 (比如 > 0.02)
# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
# # 4. 给分
# # 只有同时满足才给 1.0
# reward = torch.where(is_close & is_closed, 1.0, 0.0)
# return torch.nan_to_num(reward)
# 在 rewards.py 文件开头添加调试函数
def debug_robot_physics(
env: ManagerBasedRLEnv,
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "right_hand_body",
distance_threshold: float = 0.1, # 当距离小于此值时打印
print_interval: int = 10 # 每N步打印一次
) -> None:
"""
调试函数:打印机器人的速度、加速度等信息
"""
# 使用静态变量控制打印频率
if not hasattr(debug_robot_physics, "step_count"):
debug_robot_physics.step_count = 0
debug_robot_physics.step_count += 1
if debug_robot_physics.step_count % print_interval != 0:
return
robot = env.scene[robot_cfg.name]
# 1. 获取机器人根节点的速度和加速度
root_lin_vel = robot.data.root_lin_vel_w # (num_envs, 3)
root_ang_vel = robot.data.root_ang_vel_w # (num_envs, 3)
root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1) # (num_envs,)
root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1) # (num_envs,)
# 2. 获取夹爪body的速度
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :] # (num_envs, 3)
gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :] # (num_envs, 3)
gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1)
gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1)
# 3. 获取关节速度(可能影响碰撞)
joint_vel = robot.data.joint_vel # (num_envs, num_joints)
max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0] # (num_envs,)
# 4. 检查是否有异常高的速度(可能导致碰撞问题)
high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
if torch.any(high_vel_mask):
env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
for env_id in env_ids[:3]: # 只打印前3个环境
print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C",
left_gripper_name: str = "right_hand_l",
right_gripper_name: str = "right_hand__r",
height_offset: float = 0.03,
std: float = 0.1) -> torch.Tensor:
dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
target_pos = handle_pos_w.clone()
target_pos[:, 1] += height_offset
target_pos[:, 0] += 0.02
# target_pos[:, 2] -= 0.04
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# ===== 添加调试打印 =====
# 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
close_mask = distance < 0.03
if torch.any(close_mask):
debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
# ========================
position_reward = 1.0 - torch.tanh(distance / std)
# 计算距离后插入
if env.common_step_counter % 500 == 0:
print("pos_align dbg env0: "
f"handle={handle_pos_w[0].cpu().numpy()}, "
f"target={target_pos[0].cpu().numpy()}, "
f"gripL={left_gripper_pos[0].cpu().numpy()}, "
f"gripR={right_gripper_pos[0].cpu().numpy()}, "
f"center={gripper_center[0].cpu().numpy()}, "
f"dist={distance[0].item():.4f}")
return position_reward
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "right_hand_l",
# right_gripper_name: str = "right_hand__r",
# height_offset: float = 0.03,
# std: float = 0.1) -> torch.Tensor:
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
# target_pos = handle_pos_w.clone()
# target_pos[:, 1] += height_offset
# target_pos[:, 0] += 0.02
# # target_pos[:, 2] -= 0.02
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# # ===== 添加调试打印 =====
# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
# close_mask = distance < 0.03
# if torch.any(close_mask):
# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
# # ========================
# position_reward = 1.0 - torch.tanh(distance / std)
# # 计算距离后插入
# if env.common_step_counter % 00 == 0:
# print("pos_align dbg env0: "
# f"handle={handle_pos_w[0].cpu().numpy()}, "
# f"target={target_pos[0].cpu().numpy()}, "
# f"gripL={left_gripper_pos[0].cpu().numpy()}, "
# f"gripR={right_gripper_pos[0].cpu().numpy()}, "
# f"center={gripper_center[0].cpu().numpy()}, "
# f"dist={distance[0].item():.4f}")
# return position_reward
# def gripper_open_when_far(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# handle_name: str,
# left_gripper_name: str,
# right_gripper_name: str,
# joint_names: list,
# dist_threshold: float = 0.5,
# target_open_pos: float = 0.3,
# ) -> torch.Tensor:
# """
# 当夹爪距离目标点较远(> dist_threshold奖励夹爪保持打开状态。
# """
# # 1. 获取位置
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 获取手柄位置
# handle_ids, _ = dryingbox.find_bodies([handle_name])
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
# # 获取夹爪中心位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# gripper_center = (pos_L + pos_R) / 2.0
# # 2. 计算距离
# dist = torch.norm(gripper_center - handle_pos, dim=-1)
# # 3. 获取手指关节位置的绝对值
# joint_ids, _ = robot.find_joints(joint_names)
# current_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# mean_pos_abs = torch.mean(current_pos_abs, dim=1)
# # 4. 计算奖励:如果距离 > 0.5,奖励 mean_pos_abs 接近 target_open_pos (0.3)
# # 使用 tanh 误差函数,误差越小奖励越高
# error = torch.abs(mean_pos_abs - target_open_pos)
# reward = 1.0 - torch.tanh(error / 0.1) # 0.1 为缩放因子,可根据需要调整
# # 只有距离大于阈值时才发放奖励
# return torch.where(dist > dist_threshold, reward, torch.zeros_like(reward))
def gripper_open_when_far(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用
robot_cfg: SceneEntityCfg,
handle_name: str, # 保留接口一致性,但不再使用
left_gripper_name: str,
right_gripper_name: str,
joint_names: list, # 不再使用
dist_threshold: float = 0.5, # 不再使用
target_open_dist: float = 0.06, # 两手指完全打开时的距离(米)
) -> torch.Tensor:
"""
奖励夹爪最大张开状态:
只根据左右两根手指之间的实际距离计算奖励,
当距离接近 target_open_dist (0.06m) 时奖励最大。
"""
# 1. 获取机器人
robot = env.scene[robot_cfg.name]
# 2. 获取左右手指的世界坐标
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# 3. 计算两手指之间的实际距离
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# 4. 奖励:距离越接近最大打开距离,奖励越高
error = torch.abs(finger_dist - target_open_dist)
reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度
return reward

View File

@@ -40,7 +40,7 @@ TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table", prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg( init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005], pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071], rot=[0.7071, 0.0, 0.0, -0.7071],
lin_vel=[0.0, 0.0, 0.0], lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0], ang_vel=[0.0, 0.0, 0.0],
), ),
@@ -76,7 +76,7 @@ DRYINGBOX_CFG = ArticulationCfg(
), ),
), ),
init_state=ArticulationCfg.InitialStateCfg( init_state=ArticulationCfg.InitialStateCfg(
pos=(0.95, -0.45, 0.9), pos=[0.95, -0.45, 0.9],
rot=[-0.7071, 0.0, 0.0, 0.7071] rot=[-0.7071, 0.0, 0.0, 0.7071]
), ),
# actuators={} # actuators={}
@@ -157,7 +157,8 @@ ULTRASOUND_CFG = ArticulationCfg(
## ##
# Scene definition # Scene definition
## ##
from isaaclab.sensors import CameraCfg
import isaaclab.sim as sim_utils
@configclass @configclass
class PullSceneCfg(InteractiveSceneCfg): class PullSceneCfg(InteractiveSceneCfg):
@@ -181,6 +182,109 @@ class PullSceneCfg(InteractiveSceneCfg):
prim_path="/World/DomeLight", prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
) )
# # ------------- 新增 6 路相机 -------------
# cam_head = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=1.93, # 使用你给的 1.93
# # 由 fx = W * f / aperture 推得 aperture = W * f / fx
# horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致
# clipping_range=(0.1, 1e5),
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.1226748226, 0.0, 0.1130563345),
# rot=(-0.5, 0.5, -0.5, 0.5),
# # convention="ros",
# # rot=(0.70710678, 0, 0.70710678, 0),
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_chest = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(-0.14909, 0.3549, -0.23295),
# rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_left_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
# rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_right_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.0610571709, 2.58e-7, 0.0711255539),#0.0610568560, 1.13e-8, 0.0683504405
# rot=(0.445, -0.549, -0.549, -0.445),#0.99452, 0, 0.10453, 0
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_top = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.4627602211, 0.0, 2.0977711170),
# rot=(0.83526, 0, 0.54986, 0),
# convention="ros",
# ),
# data_types=["rgb"],
# width=960,
# height=720,
# update_latest_camera_pose=False,
# )
# cam_side = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.76709, -0.16995, 1.13007),
# rot=(0.61427, 0, 0.02598, 0.78866),
# convention="ros",
# ),
# data_types=["rgb"],
# width=960,
# height=720,
# update_latest_camera_pose=False,
# )
# # ------------- 相机结束 -------------
## ##
@@ -222,21 +326,6 @@ class ActionsCfg:
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025), scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
) )
# left_arm_fixed = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# # 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
# scale=0.0,
# offset={
# "l_joint1": 2.3562,
# "l_joint2": -1.2217,
# "l_joint3": -1.5708,
# "l_joint4": 1.5708,
# "l_joint5": 1.5708,
# "l_joint6": -1.2217,
# },
# )
grippers_position = mdp.BinaryJointPositionActionCfg( grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot", asset_name="Mindbot",
@@ -317,16 +406,12 @@ class EventCfg:
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset", reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}}) params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
# reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
# params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
# reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
# params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
@configclass @configclass
class RewardsCfg: class RewardsCfg:
"""Reward terms for the MDP.""" """Reward terms for the MDP."""
# 1. 姿态对齐:保持不变,这是基础
gripper_handle_orientation_alignment = RewTerm( gripper_handle_orientation_alignment = RewTerm(
func=mdp.gripper_handle_orientation_alignment, func=mdp.gripper_handle_orientation_alignment,
params={ params={
@@ -337,15 +422,12 @@ class RewardsCfg:
"gripper_forward_axis": (0.0, 0.0, 1.0), "gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0), "gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 0.0, 1.0), "lid_handle_axis": (0.0, 0.0, 1.0),
# 删除了 handle_axis因为新函数中不再使用它 "max_angle_deg": 60.0, # 稍微放宽一点,或者保持 30.0,看前期训练难度
"max_angle_deg": 85.0, # 建议改为 30.0 或更小85.0 对指向性约束来说太宽松
}, },
weight=5.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力 weight=4.0
) )
# 2. 位置对齐 (合并了之前的粗略和精细对齐)
# stage 2 # std=0.05 提供了从 15cm 到 0cm 全程较好的梯度,解决了之前的梯度消失问题
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_handle_position_alignment = RewTerm( gripper_handle_position_alignment = RewTerm(
func=mdp.gripper_handle_position_alignment, func=mdp.gripper_handle_position_alignment,
params={ params={
@@ -354,29 +436,15 @@ class RewardsCfg:
"handle_name":"Equipment_BB_13_C", "handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l", "left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r", "right_gripper_name": "right_hand__r",
"height_offset": 0.00, # Z方向lid 上方 0.1m "height_offset": 0.00,
"std": 0.2, # 位置对齐的容忍度0.3 "std": 0.05, # 关键修改0.3太松0.03太紧导致梯度消失0.05是平衡点
}, },
weight=3.0 weight=6.0 # 提高权重,作为主导奖励
) )
# 3. 远距离闭合惩罚 (逻辑已修正)
# 【新增】精细对齐 (引导进入 2cm 圈) # 现在它只惩罚“在远处就闭眼”的行为,不会阻挡机器人靠近
gripper_handle_fine_alignment = RewTerm( gripper_closed_penalty = RewTerm(
func=mdp.gripper_handle_position_alignment, func=mdp.gripper_open_penalty_when_far,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.00, # Z方向lid 上方 0.1m
"std": 0.05, # 位置对齐的容忍度0.05
},
weight=10.0 # 高权重
)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={ params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"), "dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"), "robot_cfg": SceneEntityCfg("Mindbot"),
@@ -384,35 +452,188 @@ class RewardsCfg:
"left_gripper_name": "right_hand_l", "left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r", "right_gripper_name": "right_hand__r",
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"], "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
"target_close_pos": 0.02, "dist_threshold": 0.06, # 2.5cm 保护区,进入后允许闭合
"height_offset": 0.00,#0.07
"grasp_radius": 0.05,#original 0.03
}, },
weight=10.0 weight=0.5 # 权重降低,作为辅助约束,不要喧宾夺主
)
# ... 在 RewardsCfg 类中修改 ...
# 4. 近距离抓取交互 (改用 V2 版本或增加稳定性项)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_interaction_v2, # 切换到带稳定性的版本
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
"target_close_pos": 0.012,
"height_offset": 0.00,
"grasp_radius": 0.05,
},
weight=8.0 # 适当调低一点点,为稳定性奖励腾出空间
) )
# lid_lifted_reward = RewTerm( # 5. 新增:抓取稳定性奖励 (核心:惩罚抖动)
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数 gripper_grasp_stability = RewTerm(
func=mdp.gripper_grasp_stability,
params={
"robot_cfg": SceneEntityCfg("Mindbot"),
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
"vel_threshold": 0.005,
},
weight=4.0 # 只要保持静止就一直给分
)
# 4. 近距离抓取交互 (修正了目标值)
# gripper_close_interaction = RewTerm(
# func=mdp.gripper_close_when_near,
# params={ # params={
# "lid_cfg": SceneEntityCfg("lid"), # "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右 # "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# # 关键修改:物理接触点计算
# # 夹爪空载全闭是0.03 (对应0cm)
# # 把手宽3.88cm -> 单侧行程 (6-3.88)/2 = 1.06cm = 0.0106m
# # 设为 0.012 确保接触时计算出的奖励是满分 (ratio >= 1.0)
# "target_close_pos": 0.012,
# "height_offset": 0.00,
# "grasp_radius": 0.05,
# }, # },
# weight=10.0 # 给一个足够大的权重 # weight=10.0 # 只有成功抓到才给的大奖
# ) # )
# lid_lifting_reward = RewTerm( # @configclass
# func=mdp.lid_lift_progress_reward, # class RewardsCfg:
# """Reward terms for the MDP."""
# gripper_handle_orientation_alignment = RewTerm(
# func=mdp.gripper_handle_orientation_alignment,
# params={ # params={
# "lid_cfg": SceneEntityCfg("lid"), # "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标 # "robot_cfg": SceneEntityCfg("Mindbot"),
# "target_height_lift": 0.25, # "handle_name": "Equipment_BB_13_C",
# "height_offset": 0.07, # 与其他奖励保持一致 # "gripper_body_name": "right_hand_body",
# "std": 0.1 # "gripper_forward_axis": (0.0, 0.0, 1.0),
# "gripper_width_axis": (0.0, 1.0, 0.0),
# "lid_handle_axis": (0.0, 0.0, 1.0),
# # 删除了 handle_axis因为新函数中不再使用它
# "max_angle_deg": 30.0, # 建议改为 30.0 或更小85.0 对指向性约束来说太宽松
# },
# weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
# )
# # stage 2
# # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
# gripper_handle_position_alignment = RewTerm(
# func=mdp.gripper_handle_position_alignment,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "height_offset": 0.00, # Z方向lid 上方 0.1m
# "std": 0.3, # 位置对齐的容忍度0.3 ###0.1
# },
# weight=3.0
# )
# # 强制远距离打开奖励
# # 远距离闭合惩罚
# gripper_closed_penalty = RewTerm(
# func=mdp.gripper_open_penalty_when_far,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name": "Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# "dist_threshold": 0.02, # 3cm 以外不允许闭合
# "max_penalty_dist":0.055 #0.055
# },
# weight=1.0 # 提高权重让扣分很痛8.0
# )
# # 【新增】精细对齐 (引导进入 2cm 圈)
# gripper_handle_fine_alignment = RewTerm(
# func=mdp.gripper_handle_position_alignment,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "height_offset": 0.00, # Z方向lid 上方 0.1m
# "std": 0.03, # 位置对齐的容忍度0.05
# },
# weight=5.0 # 高权重
# )
# # gripper_close_interaction = RewTerm(
# # func=mdp.gripper_close_when_near,
# # params={
# # "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# # "robot_cfg": SceneEntityCfg("Mindbot"),
# # "handle_name": "Equipment_BB_13_C",
# # "left_gripper_name": "right_hand_l",
# # "right_gripper_name": "right_hand__r",
# # "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# # # 关键匹配把手尺寸
# # "handle_thickness": 0.0388, # 把手厚度X/宽度向抓取方向)
# # "thickness_tol": 0.008, # 误差容忍,约 0.8cm
# # "grasp_radius": 0.04, # 进入此半径才计算闭合奖励
# # "height_offset": 0.00,
# # "target_close_pos": 0.03, # 保留给关节闭合幅度
# # },
# # weight=10.0
# # )
# gripper_close_interaction = RewTerm(
# func=mdp.gripper_close_when_near,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# "target_close_pos": 0.03,
# "height_offset": 0.00,#0.07
# "grasp_radius": 0.04,#original 0.03
# }, # },
# # 权重设大一点,告诉它这是最终目标
# weight=10.0 # weight=10.0
# ) # )
# # lid_lifted_reward = RewTerm(
# # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
# # params={
# # "lid_cfg": SceneEntityCfg("lid"),
# # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
# # },
# # weight=10.0 # 给一个足够大的权重
# # )
# # lid_lifting_reward = RewTerm(
# # func=mdp.lid_lift_progress_reward,
# # params={
# # "lid_cfg": SceneEntityCfg("lid"),
# # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
# # "target_height_lift": 0.25,
# # "height_offset": 0.07, # 与其他奖励保持一致
# # "std": 0.1
# # },
# # # 权重设大一点,告诉它这是最终目标
# # weight=10.0
# # )

View File

@@ -0,0 +1,681 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, -0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
DRYINGBOX_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=[0.95, -0.45, 0.9],
rot=[-0.7071, 0.0, 0.0, 0.7071]
),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=["RevoluteJoint"],
stiffness=10000.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.2, 0.65, 0.9],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=[".*"],
stiffness=0.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
##
# Scene definition
##
from isaaclab.sensors import CameraCfg
import isaaclab.sim as sim_utils
@configclass
class PullSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
# ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
# lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
# # ------------- 修改后的相机配置 (直接使用 USD 内置相机) -------------
# # 1. 头部相机
# # USD 路径: /root/robot_head/cam_head
# cam_head = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_latest_camera_pose=True,
# data_types=["rgb"],
# width=1280,
# height=720,
# # 移除 spawn 和 offset直接使用 USD 中的参数和位置
# )
# # 2. 胸部相机
# # USD 路径: /root/robot_trunk/cam_chest
# cam_chest = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
# update_latest_camera_pose=True,
# data_types=["rgb"],
# width=1280,
# height=720,
# )
# # 3. 左手相机
# # USD 路径: /root/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand
# cam_left_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
# update_latest_camera_pose=True,
# data_types=["rgb"],
# width=1280,
# height=720,
# )
# # 4. 右手相机
# # USD 路径: /root/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand
# cam_right_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
# update_latest_camera_pose=True,
# data_types=["rgb"],
# width=1280,
# height=720,
# )
# ------------- 相机结束 -------------
# # ------------- 新增 6 路相机 -------------
# cam_head = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=1.93, # 使用你给的 1.93
# # 由 fx = W * f / aperture 推得 aperture = W * f / fx
# horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致
# clipping_range=(0.1, 1e5),
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.1226748226, 0.0, 0.1130563345),
# rot=(-0.5, 0.5, -0.5, 0.5),
# # convention="ros",
# # rot=(0.70710678, 0, 0.70710678, 0),
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_chest = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(-0.14909, 0.3549, -0.23295),
# rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_left_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
# rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_right_hand = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.0610571709, 2.58e-7, 0.0711255539),#0.0610568560, 1.13e-8, 0.0683504405
# rot=(0.445, -0.549, -0.549, -0.445),#0.99452, 0, 0.10453, 0
# convention="ros",
# ),
# data_types=["rgb"],
# width=1280,
# height=720,
# update_latest_camera_pose=True,
# )
# cam_top = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.4627602211, 0.0, 2.0977711170),
# rot=(0.83526, 0, 0.54986, 0),
# convention="ros",
# ),
# data_types=["rgb"],
# width=960,
# height=720,
# update_latest_camera_pose=False,
# )
# cam_side = CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
# ),
# offset=CameraCfg.OffsetCfg(
# pos=(0.76709, -0.16995, 1.13007),
# rot=(0.61427, 0, 0.02598, 0.78866),
# convention="ros",
# ),
# data_types=["rgb"],
# width=960,
# height=720,
# update_latest_camera_pose=False,
# )
# # ------------- 相机结束 -------------
##
# MDP settings
##
def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
# 新增:通用获取 body 世界姿态的函数
def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
return asset.data.body_quat_w[:, body_ids[0]]
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
right_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["r_joint[1-6]"], # 左臂的6个关节
body_name="right_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["right_hand_joint_left", "right_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.25,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
dryingbox_handle_pos = ObsTerm(
func=get_body_pos_rel,
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
)
dryingbox_handle_quat = ObsTerm(
func=get_body_quat_w,
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
)
# lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
# ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 只对左臂添加随机偏移
reset_mindbot_right_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_handle_orientation_alignment = RewTerm(
func=mdp.gripper_handle_orientation_alignment,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name": "Equipment_BB_13_C",
"gripper_body_name": "right_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 0.0, 1.0),
# 删除了 handle_axis因为新函数中不再使用它
"max_angle_deg": 30.0, # 建议改为 30.0 或更小85.0 对指向性约束来说太宽松
},
weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_handle_position_alignment = RewTerm(
func=mdp.gripper_handle_position_alignment,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.00, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度0.3 ###0.1
},
weight=3.0
)
# 强制远距离打开奖励
# 远距离闭合惩罚
gripper_closed_penalty = RewTerm(
func=mdp.gripper_open_penalty_when_far,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name": "Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
"dist_threshold": 0.02, # 3cm 以外不允许闭合
"max_penalty_dist":0.055 #0.055
},
weight=1.0 # 提高权重让扣分很痛8.0
)
# 【新增】精细对齐 (引导进入 2cm 圈)
gripper_handle_fine_alignment = RewTerm(
func=mdp.gripper_handle_position_alignment,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.00, # Z方向lid 上方 0.1m
"std": 0.03, # 位置对齐的容忍度0.05
},
weight=5.0 # 高权重
)
# gripper_close_interaction = RewTerm(
# func=mdp.gripper_close_when_near,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name": "Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# # 关键匹配把手尺寸
# "handle_thickness": 0.0388, # 把手厚度X/宽度向抓取方向)
# "thickness_tol": 0.008, # 误差容忍,约 0.8cm
# "grasp_radius": 0.04, # 进入此半径才计算闭合奖励
# "height_offset": 0.00,
# "target_close_pos": 0.03, # 保留给关节闭合幅度
# },
# weight=10.0
# )
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
"target_close_pos": 0.03,
"height_offset": 0.00,#0.07
"grasp_radius": 0.04,#original 0.03
},
weight=10.0
)
# lid_lifted_reward = RewTerm(
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
# },
# weight=10.0 # 给一个足够大的权重
# )
# lid_lifting_reward = RewTerm(
# func=mdp.lid_lift_progress_reward,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
# "target_height_lift": 0.25,
# "height_offset": 0.07, # 与其他奖励保持一致
# "std": 0.1
# },
# # 权重设大一点,告诉它这是最终目标
# weight=10.0
# )
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
# lid_fly_away = DoneTerm(
# func=mdp.base_height_failure,
# params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
# )
# # 新增:盖子掉落判定
# # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
# lid_dropped = DoneTerm(
# func=mdp.base_height_failure, # 复用高度判定函数
# params={
# "asset_cfg": SceneEntityCfg("lid"),
# "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
# },
# )
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class PullEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: PullSceneCfg = PullSceneCfg(num_envs=5, env_spacing=3.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Pull-Ultrasound-Lid-Up-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
# experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
experiment_name = "mindbot_pullUltrasoundLidUp"
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,881 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.1,
# std: float = 0.1) -> torch.Tensor:
# """奖励函数夹爪相对于lid的位置对齐。
# Args:
# env: 环境实例
# lid_cfg: lid的配置
# robot_cfg: 机器人的配置
# left_gripper_name: 左侧夹爪body名称
# right_gripper_name: 右侧夹爪body名称
# side_distance: Y方向的期望距离
# height_offset: Z方向的期望高度偏移
# std: tanh核函数的标准差
# Returns:
# 位置对齐奖励 (num_envs,)
# """
# lid: RigidObject = env.scene[lid_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3]
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # 计算相对位置(夹爪中心相对于 lid 中心)
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# # X 方向位置:应该对齐(接近 0
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
# x_reward = 1.0 - torch.tanh(x_dist / std)
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
# # y_error = torch.abs(y_dist - side_distance)
# # y_reward = 1.0 - torch.tanh(y_error / std)
# y_reward = 1.0 - torch.tanh(y_dist / std)
# # Z 方向位置:应该在 lid 上方 height_offset 处
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
# z_reward = 1.0 - torch.tanh(z_error / std)
# # 组合位置奖励
# position_reward = x_reward * y_reward * z_reward
# return position_reward
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数夹爪相对于lid的位置对齐简化版
计算夹爪中心到目标点lid上方height_offset处的距离奖励。
Args:
env: 环境实例
lid_cfg: lid的配置
robot_cfg: 机器人的配置
left_gripper_name: 左侧夹爪body名称
right_gripper_name: 右侧夹爪body名称
height_offset: Z方向的期望高度偏移目标点在lid上方
std: tanh核函数的标准差
Returns:
位置对齐奖励 (num_envs,)
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 目标点lid位置 + height_offsetZ方向
target_pos = lid_pos_w.clone()
# print(target_pos[0])
target_pos[:, 2] += height_offset # 在lid上方height_offset处
# print(target_pos[0])
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# print(gripper_center[0])
# 计算夹爪中心到目标点的3D距离
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 距离奖励:距离越小,奖励越大
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# ... (保留原有的 import)
# ==============================================================================
# 新增:直接距离惩罚 (线性引导)
# ==============================================================================
def gripper_lid_distance_penalty(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.07,
) -> torch.Tensor:
"""
计算夹爪中心到目标点的欧式距离。
返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
这样:距离越远 -> 惩罚越大 (Total Reward 越低)。
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# 1. 目标点 (Lid中心 + 高度偏移)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 2. 夹爪中心
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_pos + right_pos) / 2.0
# 3. 计算距离
distance = torch.norm(gripper_center - lid_pos_w, dim=1)
return distance
# ==============================================================================
# 优化:位置对齐 (保持原函数,但确保逻辑清晰)
# ==============================================================================
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
target_pos = lid_pos_w.clone()
target_pos[:, 2] += height_offset
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# def gripper_lid_orientation_alignment(
# env,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# gripper_body_name: str = "left_hand_body",
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
# ) -> torch.Tensor:
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 获取姿态
# lid_quat_w = lid.data.root_quat_w
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# # 3. 垂直对齐 (Vertical Alignment)
# # 目标:夹爪 +Y 指向 World -Z
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
# # print(grip_fwd_world[0])
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
# # rew_vertical = (dot_vertical + 1.0) / 2.0
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
# # nan -> 0
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# # 4. 偏航对齐 (Yaw Alignment)
# # 目标:夹爪 +Z 平行于 Lid +Y
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# # nan -> 0
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# # 5. 组合
# total_reward = rew_vertical * rew_yaw
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# # debug
# if not torch.isfinite(total_reward).all():
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
# print(f"rew_vertical: {rew_vertical}")
# print(f"rew_yaw: {rew_yaw}")
# return total_reward
def gripper_lid_orientation_alignment(
env,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
) -> torch.Tensor:
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 获取姿态
lid_quat_w = lid.data.root_quat_w
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# 3. 垂直对齐 (Vertical Alignment)
# 目标:夹爪前向轴指向 World -Z向下
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# 计算点积(归一化后,点积 = cos(角度)
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
# 计算角度(弧度)
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
# 如果角度 <= max_angle_deg给予奖励
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
# nan -> 0
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# 4. 偏航对齐 (Yaw Alignment)
# 目标:夹爪 +Z 平行于 Lid +Y
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_vertical * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
# # 注意:确保 joint_names 只包含那两个夹爪关节
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
# ) -> torch.Tensor:
# """
# 逻辑很简单:
# 1. 如果 距离 < close_threshold目标关节位置 = 0.03 (闭合)
# 2. 如果 距离 >= close_threshold目标关节位置 = 0.0 (张开)
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算距离 (末端 vs Lid中心)
# lid_pos_w = lid.data.root_pos_w[:, :3]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # distance: (num_envs,)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 3. 动态确定目标关节值
# # 如果距离小于阈值,目标设为 close_pos否则设为 open_pos
# # target_val: (num_envs, 1)
# # print(distance)
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 4. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# # joint_pos: (num_envs, 2)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# # 取绝对值防止左指是负数的情况虽然你的配置里范围看起来都是正的加上abs更保险
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 5. 计算误差
# # error: (num_envs,) -> 两个手指误差的平均值
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # # 6. 返回负奖励 (惩罚)
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
# # return -error
# # =====================================================
# # 🚀 核心修复:安全输出
# # =====================================================
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
# reward = -torch.tanh(error / 0.01)
# # 2. 过滤 NaN这一步能救命
# # 如果物理引擎算出 NaN这里会把它变成 0.0,防止训练崩溃
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
# return reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05,
# target_open_pos: float = 0.0,
# target_close_pos: float = 0.03,
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
# ) -> torch.Tensor:
# """
# 逻辑:
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
# 2. 如果距离 < close_threshold目标关节位置 = 闭合。
# 3. 否则:目标关节位置 = 张开。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
# # 获取夹爪位置
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # 3. 计算距离 (夹爪 vs 目标抓取点)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 4. 动态确定目标关节值
# # 靠近抓取点 -> 闭合;远离 -> 张开
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 5. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 6. 计算误差并返回奖励
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # 使用 tanh 限制数值范围
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
# return torch.nan_to_num(reward, nan=0.0)
def _is_grasped(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪Z轴高度合适不能压在盖子上面 (Z-Axis Check)
"""
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
is_near = dist_to_handle < grasp_radius
# 5. 条件二Z轴高度判定 (防止压在盖子上)
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
# 获取关节位置 (绝对值)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# === 修改闭合判定 ===
# 1. 关节位置判定 (Effort Check)
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
# 假设 0.05 是你的新 target_close_pos
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
# 获取指尖位置
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
# 假设把手厚度是 1cm (0.01m)
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
# 如果指尖距离 >= 0.005,说明中间有东西挡着
is_not_empty = dist_fingertips > 0.005
# 综合判定:
# 1. 在把手附近 (is_near)
# 2. 高度合适 (is_z_valid)
# 3. 在用力闭合 (is_effort_closing)
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
return is_near & is_z_valid & is_effort_closing & is_not_empty
def gripper_close_when_near(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
target_close_pos: float = 0.03,
height_offset: float = 0.02,
grasp_radius: float = 0.05
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取当前关节位置绝对值 (num_envs, num_joints)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# 计算两个指关节的平均闭合深度
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
# 计算奖励系数:当前位置 / 目标闭合位置
# 这样当关节越接近 0.03 时,奖励越接近 1.0
# 强制让 Agent 产生“压满”动作的冲动
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
# 如果没对准或者没夹到,奖励依然是 0
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds

View File

@@ -0,0 +1,578 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import A, T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from isaaclab.sensors import CameraCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.2, 0.65, 0.9],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
ROOM_CFG = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Room",
spawn=UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
),
)
# ROOM_CFG = RigidObjectCfg(
# prim_path="{ENV_REGEX_NS}/Room",
# init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.0, 0.0, 0.0],
# rot=[1.0, 0.0, 0.0, 0.0],
# lin_vel=[0.0, 0.0, 0.0],
# ang_vel=[0.0, 0.0, 0.0],
# ),
# spawn=UsdFileCfg(
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
# # scale=(0.2, 0.2, 0.2),
# rigid_props=RigidBodyPropertiesCfg(
# rigid_body_enabled= True,
# solver_position_iteration_count=32,
# solver_velocity_iteration_count=16,
# max_angular_velocity=1000.0,
# max_linear_velocity=1000.0,
# max_depenetration_velocity=0.5,#original 5.0
# linear_damping=5.0, #original 0.5
# angular_damping=5.0, #original 0.5
# disable_gravity=False,
# ),
# collision_props=CollisionPropertiesCfg(
# collision_enabled=True,
# contact_offset=0.0005,#original 0.02
# rest_offset=0
# ),
# ),
# )
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=[".*"],
stiffness=0.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
# head_camera=CameraCfg(
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
# update_period=1/120,
# height=480,
# width=640,
# data_type=["rgb"],
# spawn=None,
# )
left_hand_camera=CameraCfg(
prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
update_period=1/120,
height=480,
width=640,
data_types=["rgb"],
spawn=None,
)
right_hand_camera=CameraCfg(
prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
update_period=1/120,
height=480,
width=640,
data_types=["rgb"],
spawn=None,
)
head_camera=CameraCfg(
prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
update_period=1/120,
height=480,
width=640,
data_types=["rgb"],
spawn=None,
)
chest_camera=CameraCfg(
prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
update_period=1/120,
height=480,
width=640,
data_types=["rgb"],
spawn=None,
)
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
left_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 左臂的6个关节
body_name="left_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
# right_arm_fixed = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# # 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
# scale=0.0,
# # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
# # 对应 (135, -70, -90, 90, 90, -70)
# offset={
# "r_joint1": 2.3562,
# "r_joint2": -1.2217,
# "r_joint3": -1.5708,
# "r_joint4": 1.5708,
# "r_joint5": 1.5708,
# "r_joint6": -1.2217,
# },
# )
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.3,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移must be
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 2. 底座安装误差 (建议缩小到 2cm)
reset_mindbot_base = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"pose_range": {
"x": (-0.05, 0.05),
"y": (-0.05, 0.05),
"z": (0.74, 0.75),
"yaw": (-0.1, 0.1),
},
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
},
)
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
"position_range": (-0.01, 0.01),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_ultrasound = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("ultrasound"),
"pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
reset_lid = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("lid"),
"pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "left_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 1.0, 0.0),
"max_angle_deg": 85.0, # 允许60度内的偏差
},
weight=5.0 #original 5.0
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度
},
weight=3.0 #original 3.0
)
approach_lid_penalty = RewTerm(
func=mdp.gripper_lid_distance_penalty,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07,
},
# weight 为负数表示惩罚。
# 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。
weight=-1.0
)
# 【新增】精细对齐 (引导进入 2cm 圈)
gripper_lid_fine_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
},
weight=10.0 # 高权重
)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
"target_close_pos": 0.03,
"height_offset": 0.04,
"grasp_radius": 0.03,
},
weight=10.0
)
lid_lifted_reward = RewTerm(
func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
params={
"lid_cfg": SceneEntityCfg("lid"),
"minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
},
weight=10.0 # 给一个足够大的权重
)
lid_lifting_reward = RewTerm(
func=mdp.lid_lift_progress_reward,
params={
"lid_cfg": SceneEntityCfg("lid"),
"initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
"target_height_lift": 0.2,
"height_offset": 0.07, # 与其他奖励保持一致
"std": 0.1
},
# 权重设大一点,告诉它这是最终目标
weight=10.0
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
lid_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
)
# 新增:盖子掉落判定
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
lid_dropped = DoneTerm(
func=mdp.base_height_failure, # 复用高度判定函数
params={
"asset_cfg": SceneEntityCfg("lid"),
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
},
)
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (0, 2.5, 2.6)#(3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

282
test.py Normal file
View File

@@ -0,0 +1,282 @@
import argparse
import sys
import os
import time
import imageio
import numpy as np
import h5py
import torch
# 先引入 AppLauncher并尽早实例化 SimulationApp避免 pxr 未加载
from isaaclab.app import AppLauncher
import cli_args # isort: skip
# CLI
parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出")
cli_args.add_rsl_rl_args(parser)
AppLauncher.add_app_launcher_args(parser)
args_cli, hydra_args = parser.parse_known_args()
if args_cli.video:
args_cli.enable_cameras = True
# 将 hydra 剩余参数传递
sys.argv = [sys.argv[0]] + hydra_args
# ==== 先实例化 SimulationApp ====
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
import gymnasium as gym
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
print(f"[INFO] Loading experiment from directory: {log_root_path}")
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
if not resume_path:
print("[INFO] No pre-trained checkpoint for this task.")
return
elif args_cli.checkpoint:
resume_path = retrieve_file_path(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
log_dir = os.path.dirname(resume_path)
env_cfg.log_dir = log_dir
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
runner.load(resume_path)
policy = runner.get_inference_policy(device=env.unwrapped.device)
try:
policy_nn = runner.alg.policy
except AttributeError:
policy_nn = runner.alg.actor_critic
# 导出模型
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
# dt = env.unwrapped.step_dt
# # 录制缓冲
# cam_buffers = {n: [] for n in CAM_NAMES}
# joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
# t0 = time.time()
# obs = env.get_observations()
# timestep = 0
# step_count = 0
# try:
# while simulation_app.is_running():
# start_time = time.time()
# with torch.inference_mode():
# actions = policy(obs)
# obs, _, dones, _ = env.step(actions)
# policy_nn.reset(dones)
# # 相机帧(取 env0如需全部 env 去掉 [0]
# for name in CAM_NAMES:
# if name not in env.unwrapped.scene.sensors:
# continue
# cam = env.unwrapped.scene.sensors[name]
# rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
# if rgba is None:
# continue
# frame = rgba[0].cpu().numpy()
# if frame.shape[-1] == 4:
# frame = frame[..., :3]
# cam_buffers[name].append(frame)
# # 关节 / 速度 / 动作
# robot = env.unwrapped.scene["Mindbot"]
# joint_log.append(robot.data.joint_pos.cpu().numpy())
# joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
# action_log.append(actions.cpu().numpy())
# ts_log.append(time.time() - t0)
# step_count += 1
# if args_cli.max_steps and step_count >= args_cli.max_steps:
# break
# if args_cli.video:
# timestep += 1
# if timestep == args_cli.video_length:
# break
# sleep_time = dt - (time.time() - start_time)
# if args_cli.real_time and sleep_time > 0:
# time.sleep(sleep_time)
# finally:
# # 保存 HDF5
# h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
# with h5py.File(h5_path, "w") as f:
# f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
# f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
# f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
# f.create_dataset("timestamps", data=np.array(ts_log))
# for name, frames in cam_buffers.items():
# if not frames:
# continue
# dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
# if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
# fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
# else:
# fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
# dset.attrs["fx"] = fx
# dset.attrs["fy"] = fy
# dset.attrs["cx"] = cx
# dset.attrs["cy"] = cy
# dset.attrs["focal_length"] = 1.93
# print(f"[INFO] Saved HDF5 to {h5_path}")
# # 可选:单路 MP4
# head_frames = cam_buffers["cam_head"]
# if head_frames:
# fps = int(round(1.0 / dt))
# video_path = os.path.join(log_dir, "cam_head.mp4")
# imageio.mimsave(video_path, head_frames, fps=fps)
# print(f"[INFO] Saved video to {video_path}")
# env.close()
dt = env.unwrapped.step_dt
# 录制缓冲
cam_buffers = {n: [] for n in CAM_NAMES}
joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
t0 = time.time()
obs = env.get_observations()
timestep = 0
while simulation_app.is_running():
start_time = time.time()
with torch.inference_mode():
actions = policy(obs)
obs, _, dones, _ = env.step(actions)
policy_nn.reset(dones)
# 相机帧(取 env0如需全部 env 去掉 [0]
for name in CAM_NAMES:
if name not in env.unwrapped.scene.sensors:
continue
cam = env.unwrapped.scene.sensors[name]
rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
if rgba is None:
continue
frame = rgba[0].cpu().numpy()
if frame.shape[-1] == 4:
frame = frame[..., :3]
cam_buffers[name].append(frame)
# 关节 / 速度 / 动作
robot = env.unwrapped.scene["Mindbot"]
joint_log.append(robot.data.joint_pos.cpu().numpy())
joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
action_log.append(actions.cpu().numpy())
ts_log.append(time.time() - t0)
if args_cli.video:
timestep += 1
if timestep == args_cli.video_length:
break
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# 保存 HDF5
h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
with h5py.File(h5_path, "w") as f:
f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
f.create_dataset("timestamps", data=np.array(ts_log))
for name, frames in cam_buffers.items():
if not frames:
continue
dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
# 内参:按你相机 vector 设置
if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
else:
fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
dset.attrs["fx"] = fx
dset.attrs["fy"] = fy
dset.attrs["cx"] = cx
dset.attrs["cy"] = cy
dset.attrs["focal_length"] = 1.93
print(f"[INFO] Saved HDF5 to {h5_path}")
# 可选:单路 MP4
head_frames = cam_buffers["cam_head"]
if head_frames:
fps = int(round(1.0 / dt))
video_path = os.path.join(log_dir, "cam_head.mp4")
imageio.mimsave(video_path, head_frames, fps=fps)
print(f"[INFO] Saved video to {video_path}")
env.close()
if __name__ == "__main__":
main()
simulation_app.close()
# import h5py
# h5_path = r"C:\Users\PC\workpalce\mindbot\logs\rsl_rl\mindbot_grasp\2026-01-15_11-51-00\rollout_multi_cam.h5"
# with h5py.File(h5_path, "r") as f:
# def walk(name, obj):
# if isinstance(obj, h5py.Dataset):
# print(f"{name}: shape={obj.shape}, dtype={obj.dtype}")
# else:
# print(f"{name}/")
# f.visititems(walk)

73
test_sence.py Normal file
View File

@@ -0,0 +1,73 @@
from isaaclab.app import AppLauncher
import argparse
# -----------------------------------------------------------------------------
# 1. 启动 Isaac Sim
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
AppLauncher.add_app_launcher_args(parser)
args = parser.parse_args()
app_launcher = AppLauncher(args)
simulation_app = app_launcher.app
# -----------------------------------------------------------------------------
# 2. 必须先创建 SimulationContextPhysX 在这里初始化)
# -----------------------------------------------------------------------------
import isaaclab.sim as sim_utils
sim_cfg = sim_utils.SimulationCfg(
dt=1 / 120,
device=args.device,
)
sim = sim_utils.SimulationContext(sim_cfg)
# (可选)设置相机
sim.set_camera_view(
eye=[3.0, 0.0, 2.0],
target=[0.0, 0.0, 0.5],
)
# -----------------------------------------------------------------------------
# 3. 再 play timeline这一步非常关键
# -----------------------------------------------------------------------------
import omni.timeline
timeline = omni.timeline.get_timeline_interface()
timeline.play()
# -----------------------------------------------------------------------------
# 4. 现在才可以安全创建 Env
# -----------------------------------------------------------------------------
import torch
from isaaclab.envs import ManagerBasedRLEnv
from mindbot.tasks.manager_based.pull.pull_env_cfg import PullEnvCfg
def main():
env_cfg = PullEnvCfg()
env_cfg.scene.num_envs = 1
env = ManagerBasedRLEnv(cfg=env_cfg)
obs, _ = env.reset()
print("[OK] Env reset")
for step in range(100000):
actions = torch.zeros(
env.num_envs,
env.action_manager.total_action_dim,
device=env.device,
)
obs, rew, done, trunc, info = env.step(actions)
if step % 100 == 0:
root_pos = env.scene["Mindbot"].data.root_pos_w[0]
print(f"step {step}, root z = {root_pos[2].item():.3f}")
print("[DONE]")
if __name__ == "__main__":
main()
simulation_app.close()