本地4090代码提交
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,3 +1,8 @@
|
||||
logs/
|
||||
outputs/
|
||||
.venv/
|
||||
|
||||
|
||||
# C++
|
||||
**/cmake-build*/
|
||||
**/build*/
|
||||
|
||||
@@ -9,6 +9,12 @@
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import torch
|
||||
import torchvision
|
||||
import gymnasium as gym
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from isaaclab.app import AppLauncher
|
||||
|
||||
@@ -18,7 +24,7 @@ import cli_args # isort: skip
|
||||
# add argparse arguments
|
||||
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
|
||||
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
|
||||
parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
|
||||
parser.add_argument(
|
||||
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
|
||||
)
|
||||
@@ -53,11 +59,6 @@ simulation_app = app_launcher.app
|
||||
|
||||
"""Rest everything follows."""
|
||||
|
||||
import gymnasium as gym
|
||||
import os
|
||||
import time
|
||||
import torch
|
||||
|
||||
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
|
||||
from isaaclab.envs import (
|
||||
@@ -79,6 +80,215 @@ from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
|
||||
import mindbot.tasks # noqa: F401
|
||||
|
||||
# ==============================================================================
|
||||
# [Update] 多相机、多环境录制类
|
||||
# ==============================================================================
|
||||
# 1. 确保文件顶部有以下导入
|
||||
import imageio
|
||||
import numpy as np
|
||||
|
||||
"""use gpu compute to record video"""
|
||||
|
||||
class MultiCameraRecorder:
|
||||
def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
|
||||
self.env = env
|
||||
self.camera_names = camera_names
|
||||
self.env_indices = env_indices
|
||||
self.output_dir = output_dir
|
||||
self.fps = fps
|
||||
self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
self.cameras = {}
|
||||
for name in camera_names:
|
||||
if name in self.env.unwrapped.scene.keys():
|
||||
self.cameras[name] = self.env.unwrapped.scene[name]
|
||||
print(f"[INFO] Camera {name} linked.")
|
||||
|
||||
def record_step(self):
|
||||
"""保持在 GPU 上克隆数据"""
|
||||
for cam_name, camera_obj in self.cameras.items():
|
||||
# 获取数据前强制同步一次(防止后端丢失)
|
||||
rgb_data = camera_obj.data.output["rgb"]
|
||||
|
||||
for env_idx in self.env_indices:
|
||||
# 使用 .clone() 保持在 GPU,但要注意显存占用
|
||||
self.frames[cam_name][env_idx].append(rgb_data[env_idx].clone())
|
||||
|
||||
def save_videos(self, filename_suffix=""):
|
||||
print(f"[INFO] Saving videos from GPU to Disk...")
|
||||
for cam_name, env_dict in self.frames.items():
|
||||
for env_idx, frame_list in env_dict.items():
|
||||
if not frame_list: continue
|
||||
|
||||
# 转换为 torchvision 格式 (T, C, H, W)
|
||||
video_tensor = torch.stack(frame_list)
|
||||
if video_tensor.shape[-1] == 4: # RGBA -> RGB
|
||||
video_tensor = video_tensor[..., :3]
|
||||
|
||||
# 移动到 CPU 并保存
|
||||
video_cpu = video_tensor.cpu()
|
||||
output_path = os.path.join(self.output_dir, f"{cam_name}_env{env_idx}_{filename_suffix}.mp4")
|
||||
|
||||
# 使用 torchvision 写入 (T, H, W, C)
|
||||
torchvision.io.write_video(output_path, video_cpu, fps=self.fps)
|
||||
|
||||
# 【关键】保存后立即释放显存
|
||||
del video_tensor
|
||||
del video_cpu
|
||||
frame_list.clear()
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
"""use cpu compute to record video"""
|
||||
# # 2. 修改 MultiCameraRecorder 类
|
||||
# class MultiCameraRecorder:
|
||||
# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
|
||||
# self.env = env
|
||||
# self.camera_names = camera_names
|
||||
# self.env_indices = env_indices
|
||||
# self.output_dir = output_dir
|
||||
# self.fps = fps
|
||||
# self.frames = {cam_name: {env_idx: [] for env_idx in env_indices} for cam_name in camera_names}
|
||||
# os.makedirs(self.output_dir, exist_ok=True)
|
||||
# self.cameras = {}
|
||||
# for name in camera_names:
|
||||
# try:
|
||||
# self.cameras[name] = self.env.unwrapped.scene[name]
|
||||
# print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
|
||||
# except KeyError:
|
||||
# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found!")
|
||||
|
||||
# def record_step(self):
|
||||
# """在每个仿真步调用"""
|
||||
# for cam_name, camera_obj in self.cameras.items():
|
||||
# # [关键修改] 获取数据前先确保数据已同步
|
||||
# # 这可以防止读取到正在渲染中的内存导致 access violation
|
||||
# rgb_data = camera_obj.data.output["rgb"]
|
||||
|
||||
# for env_idx in self.env_indices:
|
||||
# if env_idx >= rgb_data.shape[0]: continue
|
||||
|
||||
# # 转换为 CPU 上的 numpy,这种方式通常比 torchvision 的 tensor 堆叠更稳
|
||||
# frame = rgb_data[env_idx].clone().detach().cpu().numpy()
|
||||
# self.frames[cam_name][env_idx].append(frame)
|
||||
|
||||
# def save_videos(self, filename_suffix=""):
|
||||
# """循环结束后调用"""
|
||||
# print(f"[INFO][MultiCameraRecorder] Saving videos...")
|
||||
# for cam_name, env_dict in self.frames.items():
|
||||
# for env_idx, frame_list in env_dict.items():
|
||||
# if not frame_list: continue
|
||||
|
||||
# print(f" -> Saving {cam_name} (Env {env_idx})...")
|
||||
|
||||
# # 处理格式并使用 imageio 保存
|
||||
# processed_frames = []
|
||||
# for img in frame_list:
|
||||
# # [0, 1] -> [0, 255]
|
||||
# if img.dtype != np.uint8:
|
||||
# if img.max() <= 1.01: img = (img * 255).astype(np.uint8)
|
||||
# else: img = img.astype(np.uint8)
|
||||
# # 去掉 Alpha 通道
|
||||
# if img.shape[-1] == 4: img = img[:, :, :3]
|
||||
# processed_frames.append(img)
|
||||
|
||||
# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
|
||||
# output_path = os.path.join(self.output_dir, fname)
|
||||
|
||||
# try:
|
||||
# # 使用 imageio 写入视频
|
||||
# imageio.mimsave(output_path, processed_frames, fps=self.fps)
|
||||
# print(f" Saved: {output_path}")
|
||||
# except Exception as e:
|
||||
# print(f" [ERROR] Failed to save {fname}: {e}")
|
||||
|
||||
|
||||
|
||||
# class MultiCameraRecorder:
|
||||
# """
|
||||
# 用于从 Isaac Lab 环境中录制多个相机的图像数据,并支持多个环境实例。
|
||||
# """
|
||||
# def __init__(self, env, camera_names: list[str], env_indices: list[int], output_dir: str, fps: int = 30):
|
||||
# self.env = env
|
||||
# self.camera_names = camera_names
|
||||
# self.env_indices = env_indices # 要录制的环境索引列表,例如 [0, 1]
|
||||
# self.output_dir = output_dir
|
||||
# self.fps = fps
|
||||
|
||||
# # 数据结构: self.frames[camera_name][env_idx] = [list of tensors]
|
||||
# self.frames = {
|
||||
# cam_name: {env_idx: [] for env_idx in env_indices}
|
||||
# for cam_name in camera_names
|
||||
# }
|
||||
|
||||
# os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
# # 获取场景中的相机对象
|
||||
# self.cameras = {}
|
||||
# for name in camera_names:
|
||||
# try:
|
||||
# # 尝试获取相机句柄
|
||||
# self.cameras[name] = self.env.unwrapped.scene[name]
|
||||
# print(f"[INFO][MultiCameraRecorder] Found camera: {name}")
|
||||
# except KeyError:
|
||||
# print(f"[WARN][MultiCameraRecorder] Camera '{name}' not found in scene! Skipping.")
|
||||
|
||||
# def record_step(self):
|
||||
# """在每个仿真步调用"""
|
||||
# for cam_name, camera_obj in self.cameras.items():
|
||||
# # 获取 RGB 数据 (Shape: [num_envs, H, W, C])
|
||||
# # 注意:如果数据类型是 RGBA,后面处理
|
||||
# rgb_data = camera_obj.data.output["rgb"]
|
||||
|
||||
# for env_idx in self.env_indices:
|
||||
# # 边界检查
|
||||
# if env_idx >= rgb_data.shape[0]:
|
||||
# continue
|
||||
|
||||
# # [关键] 立即 clone 并转到 CPU。
|
||||
# # 如果不转 CPU,4个相机 * 长时间录制会迅速耗尽 GPU 显存。
|
||||
# frame = rgb_data[env_idx].clone().detach().cpu()
|
||||
|
||||
# self.frames[cam_name][env_idx].append(frame)
|
||||
|
||||
# def save_videos(self, filename_suffix=""):
|
||||
# """循环结束后调用,保存所有视频"""
|
||||
# print(f"[INFO][MultiCameraRecorder] Saving videos for {len(self.cameras)} cameras...")
|
||||
|
||||
# for cam_name, env_dict in self.frames.items():
|
||||
# for env_idx, frame_list in env_dict.items():
|
||||
# if not frame_list:
|
||||
# continue
|
||||
|
||||
# print(f" -> Processing {cam_name} (Env {env_idx}) with {len(frame_list)} frames...")
|
||||
|
||||
# # 堆叠 (T, H, W, C)
|
||||
# video_tensor = torch.stack(frame_list)
|
||||
|
||||
# # 去除 Alpha 通道
|
||||
# if video_tensor.shape[-1] == 4:
|
||||
# video_tensor = video_tensor[..., :3]
|
||||
|
||||
# # (T, H, W, C) -> (T, C, H, W)
|
||||
# video_tensor = video_tensor.permute(0, 3, 1, 2)
|
||||
|
||||
# # 确保是 uint8
|
||||
# if video_tensor.dtype != torch.uint8:
|
||||
# if video_tensor.max() <= 1.0:
|
||||
# video_tensor = (video_tensor * 255).to(torch.uint8)
|
||||
# else:
|
||||
# video_tensor = video_tensor.to(torch.uint8)
|
||||
|
||||
# # 文件名: left_hand_camera_env0_2024-xx-xx.mp4
|
||||
# fname = f"{cam_name}_env{env_idx}_{filename_suffix}.mp4"
|
||||
# output_path = os.path.join(self.output_dir, fname)
|
||||
|
||||
# torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
|
||||
# print(f" Saved: {output_path}")
|
||||
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
@hydra_task_config(args_cli.task, args_cli.agent)
|
||||
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
@@ -92,7 +302,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
|
||||
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
|
||||
# set the environment seed
|
||||
# note: certain randomizations occur in the environment initialization so we set the seed here
|
||||
env_cfg.seed = agent_cfg.seed
|
||||
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
@@ -122,7 +331,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
|
||||
if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
env = multi_agent_to_single_agent(env)
|
||||
|
||||
# wrap for video recording
|
||||
# wrap for video recording (standard Gym recording - Viewport)
|
||||
if args_cli.video:
|
||||
video_kwargs = {
|
||||
"video_folder": os.path.join(log_dir, "videos", "play"),
|
||||
@@ -151,12 +360,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
|
||||
policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
|
||||
# extract the neural network module
|
||||
# we do this in a try-except to maintain backwards compatibility.
|
||||
try:
|
||||
# version 2.3 onwards
|
||||
policy_nn = runner.alg.policy
|
||||
except AttributeError:
|
||||
# version 2.2 and below
|
||||
policy_nn = runner.alg.actor_critic
|
||||
|
||||
# extract the normalizer
|
||||
@@ -174,9 +380,42 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
|
||||
|
||||
dt = env.unwrapped.step_dt
|
||||
|
||||
# ==========================================================================
|
||||
# 初始化多相机录制器
|
||||
# ==========================================================================
|
||||
recorder = None
|
||||
if args_cli.video:
|
||||
# 1. 定义要录制的相机列表(需与 mindbot_env_cfg.py 中的命名一致)
|
||||
target_cameras = [
|
||||
"left_hand_camera",
|
||||
"right_hand_camera",
|
||||
"head_camera",
|
||||
"chest_camera"
|
||||
]
|
||||
|
||||
# 2. 定义要录制的环境索引 (env_nums)
|
||||
# 默认只录制第 0 个环境,如果要录制前 2 个,改为 [0, 1]
|
||||
# 注意:不要录制太多环境,否则写入视频时非常慢且占内存
|
||||
envs_to_record = [0,1,2,3]
|
||||
# 如果你想录制所有环境(不推荐,除非num_envs很小):
|
||||
# envs_to_record = list(range(env.unwrapped.num_envs))
|
||||
|
||||
save_dir = os.path.join(log_dir, "robot_camera_recordings")
|
||||
|
||||
recorder = MultiCameraRecorder(
|
||||
env=env,
|
||||
camera_names=target_cameras,
|
||||
env_indices=envs_to_record,
|
||||
output_dir=save_dir,
|
||||
fps=int(1/dt)
|
||||
)
|
||||
|
||||
# reset environment
|
||||
obs = env.get_observations()
|
||||
timestep = 0
|
||||
|
||||
print("[INFO] Starting simulation loop...")
|
||||
|
||||
# simulate environment
|
||||
while simulation_app.is_running():
|
||||
start_time = time.time()
|
||||
@@ -186,12 +425,26 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
|
||||
actions = policy(obs)
|
||||
# env stepping
|
||||
obs, _, dones, _ = env.step(actions)
|
||||
|
||||
# ==================================================================
|
||||
# 每一帧录制数据
|
||||
# ==================================================================
|
||||
if recorder:
|
||||
recorder.record_step()
|
||||
|
||||
# reset recurrent states for episodes that have terminated
|
||||
policy_nn.reset(dones)
|
||||
|
||||
if args_cli.video:
|
||||
timestep += 1
|
||||
# Exit the play loop after recording one video
|
||||
if timestep == args_cli.video_length:
|
||||
# ==============================================================
|
||||
# 循环结束时保存所有视频
|
||||
# ==============================================================
|
||||
if recorder:
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
recorder.save_videos(filename_suffix=timestamp)
|
||||
break
|
||||
|
||||
# time delay for real-time evaluation
|
||||
@@ -208,3 +461,564 @@ if __name__ == "__main__":
|
||||
main()
|
||||
# close sim app
|
||||
simulation_app.close()
|
||||
|
||||
# # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# # All rights reserved.
|
||||
# #
|
||||
# # SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
# """Script to play a checkpoint if an RL agent from RSL-RL."""
|
||||
|
||||
# """Launch Isaac Sim Simulator first."""
|
||||
|
||||
# import argparse
|
||||
# import sys
|
||||
# import os
|
||||
# import torch
|
||||
# import torchvision
|
||||
# import imageio
|
||||
# import numpy as np
|
||||
# import gymnasium as gym
|
||||
# import time
|
||||
# from datetime import datetime
|
||||
|
||||
# from isaaclab.app import AppLauncher
|
||||
|
||||
# # local imports
|
||||
# import cli_args # isort: skip
|
||||
|
||||
# # add argparse arguments
|
||||
# parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
|
||||
# parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
# parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
|
||||
# parser.add_argument(
|
||||
# "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
|
||||
# )
|
||||
# parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
|
||||
# parser.add_argument("--task", type=str, default=None, help="Name of the task.")
|
||||
# parser.add_argument(
|
||||
# "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
|
||||
# )
|
||||
# parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
|
||||
# parser.add_argument(
|
||||
# "--use_pretrained_checkpoint",
|
||||
# action="store_true",
|
||||
# help="Use the pre-trained checkpoint from Nucleus.",
|
||||
# )
|
||||
# parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
|
||||
# # append RSL-RL cli arguments
|
||||
# cli_args.add_rsl_rl_args(parser)
|
||||
# # append AppLauncher cli args
|
||||
# AppLauncher.add_app_launcher_args(parser)
|
||||
# # parse the arguments
|
||||
# args_cli, hydra_args = parser.parse_known_args()
|
||||
# # always enable cameras to record video
|
||||
# if args_cli.video:
|
||||
# args_cli.enable_cameras = True
|
||||
|
||||
# # clear out sys.argv for Hydra
|
||||
# sys.argv = [sys.argv[0]] + hydra_args
|
||||
|
||||
# # launch omniverse app
|
||||
# app_launcher = AppLauncher(args_cli)
|
||||
# simulation_app = app_launcher.app
|
||||
|
||||
# """Rest everything follows."""
|
||||
|
||||
# from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
|
||||
# from isaaclab.envs import (
|
||||
# DirectMARLEnv,
|
||||
# DirectMARLEnvCfg,
|
||||
# DirectRLEnvCfg,
|
||||
# ManagerBasedRLEnvCfg,
|
||||
# multi_agent_to_single_agent,
|
||||
# )
|
||||
# from isaaclab.utils.assets import retrieve_file_path
|
||||
# from isaaclab.utils.dict import print_dict
|
||||
|
||||
# from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
|
||||
# from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
|
||||
|
||||
# import isaaclab_tasks # noqa: F401
|
||||
# from isaaclab_tasks.utils import get_checkpoint_path
|
||||
# from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
|
||||
# import mindbot.tasks # noqa: F401
|
||||
|
||||
# # ==============================================================================
|
||||
# # 新增:自定义相机录制类
|
||||
# # ==============================================================================
|
||||
# class CameraRecorder:
|
||||
# """
|
||||
# 用于从 Isaac Lab 环境中录制指定相机的图像数据并保存为视频。
|
||||
# """
|
||||
# def __init__(self, env, camera_name: str, output_dir: str, fps: int = 30):
|
||||
# self.env = env
|
||||
# self.camera_name = camera_name
|
||||
# self.output_dir = output_dir
|
||||
# self.fps = fps
|
||||
# self.frames = [] # 用于存储每一帧的 Tensor
|
||||
|
||||
# # 确保输出目录存在
|
||||
# os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
# # 尝试从场景中获取相机对象
|
||||
# # env.unwrapped 访问原始的 ManagerBasedRLEnv
|
||||
# try:
|
||||
# self.camera = self.env.unwrapped.scene[camera_name]
|
||||
# print(f"[INFO][CameraRecorder] Successfully found camera: {camera_name}")
|
||||
# except KeyError:
|
||||
# print(f"[ERROR][CameraRecorder] Camera '{camera_name}' not found in scene entities!")
|
||||
# print(f"Available entities: {self.env.unwrapped.scene.keys()}")
|
||||
# raise
|
||||
|
||||
# def record_step(self):
|
||||
# """在每个仿真步调用,获取图像数据"""
|
||||
# # 获取 RGB 数据。注意:Isaac Lab 的输出通常是 (num_envs, H, W, C)
|
||||
# # 我们这里只取第一个环境 (env_index=0)
|
||||
# # output["rgb"] 可能是 RGBA 或 RGB
|
||||
# rgb_tensor = self.camera.data.output["rgb"][0]
|
||||
|
||||
# # 如果数据在 GPU 上,克隆一份并 detach,防止被后续步骤覆盖
|
||||
# # 此时先不转 CPU,以免拖慢仿真循环,最后保存时再转
|
||||
# self.frames.append(rgb_tensor.clone().detach())
|
||||
|
||||
# def save_video(self, filename="robot_camera_video.mp4"):
|
||||
# """循环结束后调用,将内存中的帧保存为视频"""
|
||||
# if not self.frames:
|
||||
# print("[WARN][CameraRecorder] No frames recorded.")
|
||||
# return
|
||||
|
||||
# print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
|
||||
|
||||
# # 将 tensor 转换为 numpy 格式
|
||||
# video_frames = []
|
||||
# for frame in self.frames:
|
||||
# # 转换为 CPU 上的 numpy 数组 (H, W, C)
|
||||
# img = frame.cpu().numpy()
|
||||
# # 处理归一化数据 [0, 1] -> [0, 255]
|
||||
# if img.dtype != np.uint8:
|
||||
# if img.max() <= 1.01:
|
||||
# img = (img * 255).astype(np.uint8)
|
||||
# else:
|
||||
# img = img.astype(np.uint8)
|
||||
# # 处理 RGBA -> RGB
|
||||
# if img.shape[-1] == 4:
|
||||
# img = img[:, :, :3]
|
||||
# video_frames.append(img)
|
||||
|
||||
# output_path = os.path.join(self.output_dir, filename)
|
||||
|
||||
# # 使用 imageio 写入视频,这种方式在 Windows 上更稳定
|
||||
# try:
|
||||
# imageio.mimsave(output_path, video_frames, fps=self.fps)
|
||||
# print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
|
||||
# except Exception as e:
|
||||
# print(f"[ERROR][CameraRecorder] Failed to save video using imageio: {e}")
|
||||
# # 如果 imageio 也失败,尝试最后的备选方案
|
||||
# print("[INFO] Attempting to save frames as images instead...")
|
||||
# # ... (可选:保存为一系列图片)
|
||||
# # def save_video(self, filename="robot_camera_video.mp4"):
|
||||
# # """循环结束后调用,将内存中的帧保存为视频"""
|
||||
# # if not self.frames:
|
||||
# # print("[WARN][CameraRecorder] No frames recorded.")
|
||||
# # return
|
||||
|
||||
# # print(f"[INFO][CameraRecorder] Saving {len(self.frames)} frames to video...")
|
||||
|
||||
# # # 堆叠帧 -> (T, H, W, C)
|
||||
# # video_tensor = torch.stack(self.frames)
|
||||
|
||||
# # # 处理 Alpha 通道:如果是 RGBA (4通道),只取前3个 RGB
|
||||
# # if video_tensor.shape[-1] == 4:
|
||||
# # video_tensor = video_tensor[..., :3]
|
||||
|
||||
# # # 转换为 (T, C, H, W) 以符合 torchvision 的输入要求
|
||||
# # # 原始: (T, H, W, C) -> Permute -> (T, C, H, W)
|
||||
# # video_tensor = video_tensor.permute(0, 3, 1, 2)
|
||||
|
||||
# # # 确保是 uint8 类型 [0, 255]
|
||||
# # if video_tensor.dtype != torch.uint8:
|
||||
# # # 如果是 float [0,1],则 * 255 并转 uint8
|
||||
# # if video_tensor.max() <= 1.0:
|
||||
# # video_tensor = (video_tensor * 255).to(torch.uint8)
|
||||
# # else:
|
||||
# # video_tensor = video_tensor.to(torch.uint8)
|
||||
|
||||
# # # 移动到 CPU
|
||||
# # video_tensor = video_tensor.cpu()
|
||||
|
||||
# # output_path = os.path.join(self.output_dir, filename)
|
||||
|
||||
# # # 使用 torchvision 保存视频
|
||||
# # torchvision.io.write_video(output_path, video_tensor, fps=self.fps)
|
||||
# # print(f"[INFO][CameraRecorder] Video saved to: {output_path}")
|
||||
|
||||
# # ==============================================================================
|
||||
|
||||
|
||||
# @hydra_task_config(args_cli.task, args_cli.agent)
|
||||
# def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
# """Play with RSL-RL agent."""
|
||||
# # grab task name for checkpoint path
|
||||
# task_name = args_cli.task.split(":")[-1]
|
||||
# train_task_name = task_name.replace("-Play", "")
|
||||
|
||||
# # override configurations with non-hydra CLI arguments
|
||||
# agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
|
||||
# env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
|
||||
# # set the environment seed
|
||||
# env_cfg.seed = agent_cfg.seed
|
||||
# env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
# # specify directory for logging experiments
|
||||
# log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
|
||||
# log_root_path = os.path.abspath(log_root_path)
|
||||
# print(f"[INFO] Loading experiment from directory: {log_root_path}")
|
||||
# if args_cli.use_pretrained_checkpoint:
|
||||
# resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
|
||||
# if not resume_path:
|
||||
# print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
|
||||
# return
|
||||
# elif args_cli.checkpoint:
|
||||
# resume_path = retrieve_file_path(args_cli.checkpoint)
|
||||
# else:
|
||||
# resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
|
||||
|
||||
# log_dir = os.path.dirname(resume_path)
|
||||
|
||||
# # set the log directory for the environment (works for all environment types)
|
||||
# env_cfg.log_dir = log_dir
|
||||
|
||||
# # create isaac environment
|
||||
# env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
|
||||
|
||||
# # convert to single-agent instance if required by the RL algorithm
|
||||
# if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
# env = multi_agent_to_single_agent(env)
|
||||
|
||||
# # wrap for video recording (standard Gym recording)
|
||||
# # 注意:这只是录制 Viewport 的画面,不是你机器人相机的画面
|
||||
# if args_cli.video:
|
||||
# video_kwargs = {
|
||||
# "video_folder": os.path.join(log_dir, "videos", "play"),
|
||||
# "step_trigger": lambda step: step == 0,
|
||||
# "video_length": args_cli.video_length,
|
||||
# "disable_logger": True,
|
||||
# }
|
||||
# print("[INFO] Recording videos during training.")
|
||||
# print_dict(video_kwargs, nesting=4)
|
||||
# env = gym.wrappers.RecordVideo(env, **video_kwargs)
|
||||
|
||||
# # wrap around environment for rsl-rl
|
||||
# env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
|
||||
|
||||
# print(f"[INFO]: Loading model checkpoint from: {resume_path}")
|
||||
# # load previously trained model
|
||||
# if agent_cfg.class_name == "OnPolicyRunner":
|
||||
# runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
# elif agent_cfg.class_name == "DistillationRunner":
|
||||
# runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
# else:
|
||||
# raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
|
||||
# runner.load(resume_path)
|
||||
|
||||
# # obtain the trained policy for inference
|
||||
# policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
|
||||
# # extract the neural network module
|
||||
# try:
|
||||
# policy_nn = runner.alg.policy
|
||||
# except AttributeError:
|
||||
# policy_nn = runner.alg.actor_critic
|
||||
|
||||
# # extract the normalizer
|
||||
# if hasattr(policy_nn, "actor_obs_normalizer"):
|
||||
# normalizer = policy_nn.actor_obs_normalizer
|
||||
# elif hasattr(policy_nn, "student_obs_normalizer"):
|
||||
# normalizer = policy_nn.student_obs_normalizer
|
||||
# else:
|
||||
# normalizer = None
|
||||
|
||||
# # export policy to onnx/jit
|
||||
# export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
|
||||
# export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
|
||||
# export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
|
||||
|
||||
# dt = env.unwrapped.step_dt
|
||||
|
||||
# # ==========================================================================
|
||||
# # 初始化自定义相机录制器
|
||||
# # ==========================================================================
|
||||
# recorder = None
|
||||
# if args_cli.video:
|
||||
# # 这里的 output_dir 设为日志目录下的 hand_camera_videos
|
||||
# save_dir = os.path.join(log_dir, "hand_camera_videos")
|
||||
# # 这里的 "left_hand_camera" 必须对应 mindbot_env_cfg.py 中定义的名称
|
||||
# recorder = CameraRecorder(env, "left_hand_camera", save_dir, fps=int(1/dt))
|
||||
|
||||
# # reset environment
|
||||
# obs = env.get_observations()
|
||||
# timestep = 0
|
||||
|
||||
# print("[INFO] Starting simulation loop...")
|
||||
|
||||
# # simulate environment
|
||||
# while simulation_app.is_running():
|
||||
# start_time = time.time()
|
||||
# # run everything in inference mode
|
||||
# with torch.inference_mode():
|
||||
# # agent stepping
|
||||
# actions = policy(obs)
|
||||
# # env stepping
|
||||
# obs, _, dones, _ = env.step(actions)
|
||||
|
||||
# # ==================================================================
|
||||
# # 每一帧录制数据
|
||||
# # ==================================================================
|
||||
# if recorder:
|
||||
# recorder.record_step()
|
||||
|
||||
# # reset recurrent states for episodes that have terminated
|
||||
# policy_nn.reset(dones)
|
||||
|
||||
# if args_cli.video:
|
||||
# timestep += 1
|
||||
# # Exit the play loop after recording one video
|
||||
# if timestep == args_cli.video_length:
|
||||
# # ==============================================================
|
||||
# # 循环结束时保存视频
|
||||
# # ==============================================================
|
||||
# if recorder:
|
||||
# timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
# recorder.save_video(filename=f"hand_cam_{timestamp}.mp4")
|
||||
# break
|
||||
|
||||
# # time delay for real-time evaluation
|
||||
# sleep_time = dt - (time.time() - start_time)
|
||||
# if args_cli.real_time and sleep_time > 0:
|
||||
# time.sleep(sleep_time)
|
||||
|
||||
# # close the simulator
|
||||
# env.close()
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# # run the main function
|
||||
# main()
|
||||
# # close sim app
|
||||
# simulation_app.close()
|
||||
|
||||
|
||||
# # # Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# # # All rights reserved.
|
||||
# # #
|
||||
# # # SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
# # """Script to play a checkpoint if an RL agent from RSL-RL."""
|
||||
|
||||
# # """Launch Isaac Sim Simulator first."""
|
||||
|
||||
# # import argparse
|
||||
# # import sys
|
||||
|
||||
# # from isaaclab.app import AppLauncher
|
||||
|
||||
# # # local imports
|
||||
# # import cli_args # isort: skip
|
||||
|
||||
# # # add argparse arguments
|
||||
# # parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
|
||||
# # parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
# # parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
|
||||
# # parser.add_argument(
|
||||
# # "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
|
||||
# # )
|
||||
# # parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
|
||||
# # parser.add_argument("--task", type=str, default=None, help="Name of the task.")
|
||||
# # parser.add_argument(
|
||||
# # "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
|
||||
# # )
|
||||
# # parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
|
||||
# # parser.add_argument(
|
||||
# # "--use_pretrained_checkpoint",
|
||||
# # action="store_true",
|
||||
# # help="Use the pre-trained checkpoint from Nucleus.",
|
||||
# # )
|
||||
# # parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
|
||||
# # # append RSL-RL cli arguments
|
||||
# # cli_args.add_rsl_rl_args(parser)
|
||||
# # # append AppLauncher cli args
|
||||
# # AppLauncher.add_app_launcher_args(parser)
|
||||
# # # parse the arguments
|
||||
# # args_cli, hydra_args = parser.parse_known_args()
|
||||
# # # always enable cameras to record video
|
||||
# # if args_cli.video:
|
||||
# # args_cli.enable_cameras = True
|
||||
|
||||
# # # clear out sys.argv for Hydra
|
||||
# # sys.argv = [sys.argv[0]] + hydra_args
|
||||
|
||||
# # # launch omniverse app
|
||||
# # app_launcher = AppLauncher(args_cli)
|
||||
# # simulation_app = app_launcher.app
|
||||
|
||||
# # """Rest everything follows."""
|
||||
|
||||
# # import gymnasium as gym
|
||||
# # import os
|
||||
# # import time
|
||||
# # import torch
|
||||
|
||||
# # from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
|
||||
# # from isaaclab.envs import (
|
||||
# # DirectMARLEnv,
|
||||
# # DirectMARLEnvCfg,
|
||||
# # DirectRLEnvCfg,
|
||||
# # ManagerBasedRLEnvCfg,
|
||||
# # multi_agent_to_single_agent,
|
||||
# # )
|
||||
# # from isaaclab.utils.assets import retrieve_file_path
|
||||
# # from isaaclab.utils.dict import print_dict
|
||||
|
||||
# # from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
|
||||
# # from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
|
||||
|
||||
# # import isaaclab_tasks # noqa: F401
|
||||
# # from isaaclab_tasks.utils import get_checkpoint_path
|
||||
# # from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
|
||||
# # import mindbot.tasks # noqa: F401
|
||||
|
||||
|
||||
# # @hydra_task_config(args_cli.task, args_cli.agent)
|
||||
# # def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
# # """Play with RSL-RL agent."""
|
||||
# # # grab task name for checkpoint path
|
||||
# # task_name = args_cli.task.split(":")[-1]
|
||||
# # train_task_name = task_name.replace("-Play", "")
|
||||
|
||||
# # # override configurations with non-hydra CLI arguments
|
||||
# # agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
|
||||
# # env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
|
||||
# # # set the environment seed
|
||||
# # # note: certain randomizations occur in the environment initialization so we set the seed here
|
||||
# # env_cfg.seed = agent_cfg.seed
|
||||
# # env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
# # # specify directory for logging experiments
|
||||
# # log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
|
||||
# # log_root_path = os.path.abspath(log_root_path)
|
||||
# # print(f"[INFO] Loading experiment from directory: {log_root_path}")
|
||||
# # if args_cli.use_pretrained_checkpoint:
|
||||
# # resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
|
||||
# # if not resume_path:
|
||||
# # print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
|
||||
# # return
|
||||
# # elif args_cli.checkpoint:
|
||||
# # resume_path = retrieve_file_path(args_cli.checkpoint)
|
||||
# # else:
|
||||
# # resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
|
||||
|
||||
# # log_dir = os.path.dirname(resume_path)
|
||||
|
||||
# # # set the log directory for the environment (works for all environment types)
|
||||
# # env_cfg.log_dir = log_dir
|
||||
|
||||
# # # create isaac environment
|
||||
# # env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
|
||||
|
||||
# # # convert to single-agent instance if required by the RL algorithm
|
||||
# # if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
# # env = multi_agent_to_single_agent(env)
|
||||
|
||||
# # # wrap for video recording
|
||||
# # if args_cli.video:
|
||||
# # video_kwargs = {
|
||||
# # "video_folder": os.path.join(log_dir, "videos", "play"),
|
||||
# # "step_trigger": lambda step: step == 0,
|
||||
# # "video_length": args_cli.video_length,
|
||||
# # "disable_logger": True,
|
||||
# # }
|
||||
# # print("[INFO] Recording videos during training.")
|
||||
# # print_dict(video_kwargs, nesting=4)
|
||||
# # env = gym.wrappers.RecordVideo(env, **video_kwargs)
|
||||
|
||||
# # # wrap around environment for rsl-rl
|
||||
# # env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
|
||||
|
||||
# # print(f"[INFO]: Loading model checkpoint from: {resume_path}")
|
||||
# # # load previously trained model
|
||||
# # if agent_cfg.class_name == "OnPolicyRunner":
|
||||
# # runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
# # elif agent_cfg.class_name == "DistillationRunner":
|
||||
# # runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
# # else:
|
||||
# # raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
|
||||
# # runner.load(resume_path)
|
||||
|
||||
# # # obtain the trained policy for inference
|
||||
# # policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
|
||||
# # # extract the neural network module
|
||||
# # # we do this in a try-except to maintain backwards compatibility.
|
||||
# # try:
|
||||
# # # version 2.3 onwards
|
||||
# # policy_nn = runner.alg.policy
|
||||
# # except AttributeError:
|
||||
# # # version 2.2 and below
|
||||
# # policy_nn = runner.alg.actor_critic
|
||||
|
||||
# # # extract the normalizer
|
||||
# # if hasattr(policy_nn, "actor_obs_normalizer"):
|
||||
# # normalizer = policy_nn.actor_obs_normalizer
|
||||
# # elif hasattr(policy_nn, "student_obs_normalizer"):
|
||||
# # normalizer = policy_nn.student_obs_normalizer
|
||||
# # else:
|
||||
# # normalizer = None
|
||||
|
||||
# # # export policy to onnx/jit
|
||||
# # export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
|
||||
# # export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
|
||||
# # export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
|
||||
|
||||
# # dt = env.unwrapped.step_dt
|
||||
|
||||
# # # reset environment
|
||||
# # obs = env.get_observations()
|
||||
# # timestep = 0
|
||||
# # # simulate environment
|
||||
# # while simulation_app.is_running():
|
||||
# # start_time = time.time()
|
||||
# # # run everything in inference mode
|
||||
# # with torch.inference_mode():
|
||||
# # # agent stepping
|
||||
# # actions = policy(obs)
|
||||
# # # env stepping
|
||||
# # obs, _, dones, _ = env.step(actions)
|
||||
# # # reset recurrent states for episodes that have terminated
|
||||
# # policy_nn.reset(dones)
|
||||
# # if args_cli.video:
|
||||
# # timestep += 1
|
||||
# # # Exit the play loop after recording one video
|
||||
# # if timestep == args_cli.video_length:
|
||||
# # break
|
||||
|
||||
# # # time delay for real-time evaluation
|
||||
# # sleep_time = dt - (time.time() - start_time)
|
||||
# # if args_cli.real_time and sleep_time > 0:
|
||||
# # time.sleep(sleep_time)
|
||||
|
||||
# # # close the simulator
|
||||
# # env.close()
|
||||
|
||||
|
||||
# # if __name__ == "__main__":
|
||||
# # # run the main function
|
||||
# # main()
|
||||
# # # close sim app
|
||||
# # simulation_app.close()
|
||||
|
||||
210
scripts/rsl_rl/play.py.bak
Normal file
210
scripts/rsl_rl/play.py.bak
Normal file
@@ -0,0 +1,210 @@
|
||||
# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""Script to play a checkpoint if an RL agent from RSL-RL."""
|
||||
|
||||
"""Launch Isaac Sim Simulator first."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from isaaclab.app import AppLauncher
|
||||
|
||||
# local imports
|
||||
import cli_args # isort: skip
|
||||
|
||||
# add argparse arguments
|
||||
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
|
||||
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
|
||||
parser.add_argument(
|
||||
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
|
||||
)
|
||||
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
|
||||
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
|
||||
parser.add_argument(
|
||||
"--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
|
||||
)
|
||||
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
|
||||
parser.add_argument(
|
||||
"--use_pretrained_checkpoint",
|
||||
action="store_true",
|
||||
help="Use the pre-trained checkpoint from Nucleus.",
|
||||
)
|
||||
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
|
||||
# append RSL-RL cli arguments
|
||||
cli_args.add_rsl_rl_args(parser)
|
||||
# append AppLauncher cli args
|
||||
AppLauncher.add_app_launcher_args(parser)
|
||||
# parse the arguments
|
||||
args_cli, hydra_args = parser.parse_known_args()
|
||||
# always enable cameras to record video
|
||||
if args_cli.video:
|
||||
args_cli.enable_cameras = True
|
||||
|
||||
# clear out sys.argv for Hydra
|
||||
sys.argv = [sys.argv[0]] + hydra_args
|
||||
|
||||
# launch omniverse app
|
||||
app_launcher = AppLauncher(args_cli)
|
||||
simulation_app = app_launcher.app
|
||||
|
||||
"""Rest everything follows."""
|
||||
|
||||
import gymnasium as gym
|
||||
import os
|
||||
import time
|
||||
import torch
|
||||
|
||||
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
|
||||
from isaaclab.envs import (
|
||||
DirectMARLEnv,
|
||||
DirectMARLEnvCfg,
|
||||
DirectRLEnvCfg,
|
||||
ManagerBasedRLEnvCfg,
|
||||
multi_agent_to_single_agent,
|
||||
)
|
||||
from isaaclab.utils.assets import retrieve_file_path
|
||||
from isaaclab.utils.dict import print_dict
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
|
||||
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
|
||||
|
||||
import isaaclab_tasks # noqa: F401
|
||||
from isaaclab_tasks.utils import get_checkpoint_path
|
||||
from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
|
||||
import mindbot.tasks # noqa: F401
|
||||
|
||||
|
||||
@hydra_task_config(args_cli.task, args_cli.agent)
|
||||
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
"""Play with RSL-RL agent."""
|
||||
# grab task name for checkpoint path
|
||||
task_name = args_cli.task.split(":")[-1]
|
||||
train_task_name = task_name.replace("-Play", "")
|
||||
|
||||
# override configurations with non-hydra CLI arguments
|
||||
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
|
||||
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
|
||||
# set the environment seed
|
||||
# note: certain randomizations occur in the environment initialization so we set the seed here
|
||||
env_cfg.seed = agent_cfg.seed
|
||||
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
# specify directory for logging experiments
|
||||
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
|
||||
log_root_path = os.path.abspath(log_root_path)
|
||||
print(f"[INFO] Loading experiment from directory: {log_root_path}")
|
||||
if args_cli.use_pretrained_checkpoint:
|
||||
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
|
||||
if not resume_path:
|
||||
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
|
||||
return
|
||||
elif args_cli.checkpoint:
|
||||
resume_path = retrieve_file_path(args_cli.checkpoint)
|
||||
else:
|
||||
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
|
||||
|
||||
log_dir = os.path.dirname(resume_path)
|
||||
|
||||
# set the log directory for the environment (works for all environment types)
|
||||
env_cfg.log_dir = log_dir
|
||||
|
||||
# create isaac environment
|
||||
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
|
||||
|
||||
# convert to single-agent instance if required by the RL algorithm
|
||||
if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
env = multi_agent_to_single_agent(env)
|
||||
|
||||
# wrap for video recording
|
||||
if args_cli.video:
|
||||
video_kwargs = {
|
||||
"video_folder": os.path.join(log_dir, "videos", "play"),
|
||||
"step_trigger": lambda step: step == 0,
|
||||
"video_length": args_cli.video_length,
|
||||
"disable_logger": True,
|
||||
}
|
||||
print("[INFO] Recording videos during training.")
|
||||
print_dict(video_kwargs, nesting=4)
|
||||
env = gym.wrappers.RecordVideo(env, **video_kwargs)
|
||||
|
||||
# wrap around environment for rsl-rl
|
||||
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
|
||||
|
||||
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
|
||||
# load previously trained model
|
||||
if agent_cfg.class_name == "OnPolicyRunner":
|
||||
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
elif agent_cfg.class_name == "DistillationRunner":
|
||||
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
else:
|
||||
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
|
||||
runner.load(resume_path)
|
||||
|
||||
# obtain the trained policy for inference
|
||||
policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
|
||||
# extract the neural network module
|
||||
# we do this in a try-except to maintain backwards compatibility.
|
||||
try:
|
||||
# version 2.3 onwards
|
||||
policy_nn = runner.alg.policy
|
||||
except AttributeError:
|
||||
# version 2.2 and below
|
||||
policy_nn = runner.alg.actor_critic
|
||||
|
||||
# extract the normalizer
|
||||
if hasattr(policy_nn, "actor_obs_normalizer"):
|
||||
normalizer = policy_nn.actor_obs_normalizer
|
||||
elif hasattr(policy_nn, "student_obs_normalizer"):
|
||||
normalizer = policy_nn.student_obs_normalizer
|
||||
else:
|
||||
normalizer = None
|
||||
|
||||
# export policy to onnx/jit
|
||||
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
|
||||
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
|
||||
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
|
||||
|
||||
dt = env.unwrapped.step_dt
|
||||
|
||||
# reset environment
|
||||
obs = env.get_observations()
|
||||
timestep = 0
|
||||
# simulate environment
|
||||
while simulation_app.is_running():
|
||||
start_time = time.time()
|
||||
# run everything in inference mode
|
||||
with torch.inference_mode():
|
||||
# agent stepping
|
||||
actions = policy(obs)
|
||||
# env stepping
|
||||
obs, _, dones, _ = env.step(actions)
|
||||
# reset recurrent states for episodes that have terminated
|
||||
policy_nn.reset(dones)
|
||||
if args_cli.video:
|
||||
timestep += 1
|
||||
# Exit the play loop after recording one video
|
||||
if timestep == args_cli.video_length:
|
||||
break
|
||||
|
||||
# time delay for real-time evaluation
|
||||
sleep_time = dt - (time.time() - start_time)
|
||||
if args_cli.real_time and sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
# close the simulator
|
||||
env.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# run the main function
|
||||
main()
|
||||
# close sim app
|
||||
simulation_app.close()
|
||||
270
scripts/rsl_rl/play.py.bak1
Normal file
270
scripts/rsl_rl/play.py.bak1
Normal file
@@ -0,0 +1,270 @@
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
import h5py
|
||||
import torch
|
||||
|
||||
# 先引入 AppLauncher,并尽早实例化 SimulationApp,避免 pxr 未加载
|
||||
from isaaclab.app import AppLauncher
|
||||
import cli_args # isort: skip
|
||||
|
||||
# CLI
|
||||
parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
|
||||
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
|
||||
parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
|
||||
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
|
||||
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
|
||||
parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
|
||||
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
|
||||
parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
|
||||
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
|
||||
parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出")
|
||||
cli_args.add_rsl_rl_args(parser)
|
||||
AppLauncher.add_app_launcher_args(parser)
|
||||
args_cli, hydra_args = parser.parse_known_args()
|
||||
if args_cli.video:
|
||||
args_cli.enable_cameras = True
|
||||
# 将 hydra 剩余参数传递
|
||||
sys.argv = [sys.argv[0]] + hydra_args
|
||||
|
||||
# ==== 先实例化 SimulationApp ====
|
||||
app_launcher = AppLauncher(args_cli)
|
||||
simulation_app = app_launcher.app
|
||||
|
||||
# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
|
||||
import gymnasium as gym
|
||||
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
|
||||
from isaaclab.utils.assets import retrieve_file_path
|
||||
from isaaclab.utils.dict import print_dict
|
||||
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
|
||||
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
|
||||
|
||||
import isaaclab_tasks # noqa: F401
|
||||
from isaaclab_tasks.utils import get_checkpoint_path
|
||||
from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
import mindbot.tasks # noqa: F401
|
||||
|
||||
CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
|
||||
|
||||
|
||||
@hydra_task_config(args_cli.task, args_cli.agent)
|
||||
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
task_name = args_cli.task.split(":")[-1]
|
||||
train_task_name = task_name.replace("-Play", "")
|
||||
|
||||
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
|
||||
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
env_cfg.seed = agent_cfg.seed
|
||||
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
|
||||
print(f"[INFO] Loading experiment from directory: {log_root_path}")
|
||||
if args_cli.use_pretrained_checkpoint:
|
||||
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
|
||||
if not resume_path:
|
||||
print("[INFO] No pre-trained checkpoint for this task.")
|
||||
return
|
||||
elif args_cli.checkpoint:
|
||||
resume_path = retrieve_file_path(args_cli.checkpoint)
|
||||
else:
|
||||
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
|
||||
|
||||
log_dir = os.path.dirname(resume_path)
|
||||
env_cfg.log_dir = log_dir
|
||||
|
||||
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
|
||||
if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
env = multi_agent_to_single_agent(env)
|
||||
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
|
||||
|
||||
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
|
||||
if agent_cfg.class_name == "OnPolicyRunner":
|
||||
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
elif agent_cfg.class_name == "DistillationRunner":
|
||||
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
else:
|
||||
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
|
||||
runner.load(resume_path)
|
||||
|
||||
policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
try:
|
||||
policy_nn = runner.alg.policy
|
||||
except AttributeError:
|
||||
policy_nn = runner.alg.actor_critic
|
||||
|
||||
# 导出模型
|
||||
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
|
||||
normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
|
||||
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
|
||||
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
|
||||
|
||||
# dt = env.unwrapped.step_dt
|
||||
|
||||
# # 录制缓冲
|
||||
# cam_buffers = {n: [] for n in CAM_NAMES}
|
||||
# joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
|
||||
# t0 = time.time()
|
||||
|
||||
# obs = env.get_observations()
|
||||
# timestep = 0
|
||||
# step_count = 0
|
||||
# try:
|
||||
# while simulation_app.is_running():
|
||||
# start_time = time.time()
|
||||
# with torch.inference_mode():
|
||||
# actions = policy(obs)
|
||||
# obs, _, dones, _ = env.step(actions)
|
||||
# policy_nn.reset(dones)
|
||||
|
||||
# # 相机帧(取 env0,如需全部 env 去掉 [0])
|
||||
# for name in CAM_NAMES:
|
||||
# if name not in env.unwrapped.scene.sensors:
|
||||
# continue
|
||||
# cam = env.unwrapped.scene.sensors[name]
|
||||
# rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
|
||||
# if rgba is None:
|
||||
# continue
|
||||
# frame = rgba[0].cpu().numpy()
|
||||
# if frame.shape[-1] == 4:
|
||||
# frame = frame[..., :3]
|
||||
# cam_buffers[name].append(frame)
|
||||
|
||||
# # 关节 / 速度 / 动作
|
||||
# robot = env.unwrapped.scene["Mindbot"]
|
||||
# joint_log.append(robot.data.joint_pos.cpu().numpy())
|
||||
# joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
|
||||
# action_log.append(actions.cpu().numpy())
|
||||
# ts_log.append(time.time() - t0)
|
||||
|
||||
# step_count += 1
|
||||
# if args_cli.max_steps and step_count >= args_cli.max_steps:
|
||||
# break
|
||||
|
||||
# if args_cli.video:
|
||||
# timestep += 1
|
||||
# if timestep == args_cli.video_length:
|
||||
# break
|
||||
|
||||
# sleep_time = dt - (time.time() - start_time)
|
||||
# if args_cli.real_time and sleep_time > 0:
|
||||
# time.sleep(sleep_time)
|
||||
# finally:
|
||||
# # 保存 HDF5
|
||||
# h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
|
||||
# with h5py.File(h5_path, "w") as f:
|
||||
# f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
|
||||
# f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
|
||||
# f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
|
||||
# f.create_dataset("timestamps", data=np.array(ts_log))
|
||||
# for name, frames in cam_buffers.items():
|
||||
# if not frames:
|
||||
# continue
|
||||
# dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
|
||||
# if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
|
||||
# fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
|
||||
# else:
|
||||
# fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
|
||||
# dset.attrs["fx"] = fx
|
||||
# dset.attrs["fy"] = fy
|
||||
# dset.attrs["cx"] = cx
|
||||
# dset.attrs["cy"] = cy
|
||||
# dset.attrs["focal_length"] = 1.93
|
||||
# print(f"[INFO] Saved HDF5 to {h5_path}")
|
||||
|
||||
# # 可选:单路 MP4
|
||||
# head_frames = cam_buffers["cam_head"]
|
||||
# if head_frames:
|
||||
# fps = int(round(1.0 / dt))
|
||||
# video_path = os.path.join(log_dir, "cam_head.mp4")
|
||||
# imageio.mimsave(video_path, head_frames, fps=fps)
|
||||
# print(f"[INFO] Saved video to {video_path}")
|
||||
|
||||
# env.close()
|
||||
dt = env.unwrapped.step_dt
|
||||
|
||||
# 录制缓冲
|
||||
cam_buffers = {n: [] for n in CAM_NAMES}
|
||||
joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
|
||||
t0 = time.time()
|
||||
|
||||
obs = env.get_observations()
|
||||
timestep = 0
|
||||
while simulation_app.is_running():
|
||||
start_time = time.time()
|
||||
with torch.inference_mode():
|
||||
actions = policy(obs)
|
||||
obs, _, dones, _ = env.step(actions)
|
||||
policy_nn.reset(dones)
|
||||
|
||||
# 相机帧(取 env0,如需全部 env 去掉 [0])
|
||||
for name in CAM_NAMES:
|
||||
if name not in env.unwrapped.scene.sensors:
|
||||
continue
|
||||
cam = env.unwrapped.scene.sensors[name]
|
||||
rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
|
||||
if rgba is None:
|
||||
continue
|
||||
frame = rgba[0].cpu().numpy()
|
||||
if frame.shape[-1] == 4:
|
||||
frame = frame[..., :3]
|
||||
cam_buffers[name].append(frame)
|
||||
|
||||
# 关节 / 速度 / 动作
|
||||
robot = env.unwrapped.scene["Mindbot"]
|
||||
joint_log.append(robot.data.joint_pos.cpu().numpy())
|
||||
joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
|
||||
action_log.append(actions.cpu().numpy())
|
||||
ts_log.append(time.time() - t0)
|
||||
|
||||
if args_cli.video:
|
||||
timestep += 1
|
||||
if timestep == args_cli.video_length:
|
||||
break
|
||||
|
||||
sleep_time = dt - (time.time() - start_time)
|
||||
if args_cli.real_time and sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
# 保存 HDF5
|
||||
h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
|
||||
with h5py.File(h5_path, "w") as f:
|
||||
f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
|
||||
f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
|
||||
f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
|
||||
f.create_dataset("timestamps", data=np.array(ts_log))
|
||||
for name, frames in cam_buffers.items():
|
||||
if not frames:
|
||||
continue
|
||||
dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
|
||||
# 内参:按你相机 vector 设置
|
||||
if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
|
||||
fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
|
||||
else:
|
||||
fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
|
||||
dset.attrs["fx"] = fx
|
||||
dset.attrs["fy"] = fy
|
||||
dset.attrs["cx"] = cx
|
||||
dset.attrs["cy"] = cy
|
||||
dset.attrs["focal_length"] = 1.93
|
||||
print(f"[INFO] Saved HDF5 to {h5_path}")
|
||||
|
||||
# 可选:单路 MP4
|
||||
head_frames = cam_buffers["cam_head"]
|
||||
if head_frames:
|
||||
fps = int(round(1.0 / dt))
|
||||
video_path = os.path.join(log_dir, "cam_head.mp4")
|
||||
imageio.mimsave(video_path, head_frames, fps=fps)
|
||||
print(f"[INFO] Saved video to {video_path}")
|
||||
|
||||
env.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
simulation_app.close()
|
||||
@@ -18,8 +18,9 @@ from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
|
||||
MINDBOT_CFG = ArticulationCfg(
|
||||
spawn=sim_utils.UsdFileCfg(
|
||||
# 1. UPDATE THE USD PATH to your local file
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd",
|
||||
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
|
||||
# C:\Users\PC\workpalce\maic_usd_assets\twinlab\MIC_sim-3.0\pupet_arm_sim_product\sim_data_acquisition\usds\Collected_robot_2_3\robot_2_4.usd
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
|
||||
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd",
|
||||
rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
disable_gravity=False,
|
||||
max_depenetration_velocity=1.0,
|
||||
@@ -66,26 +67,26 @@ MINDBOT_CFG = ArticulationCfg(
|
||||
# "l_joint6": 0.0,
|
||||
# Right arm joints
|
||||
# -45° -> -0.7854
|
||||
# "r_joint1": -0.7854,
|
||||
# # 70° -> 1.2217
|
||||
# "r_joint2": 1.2217,
|
||||
# # 90° -> 1.5708
|
||||
# "r_joint3": 1.5708,
|
||||
# # -90° -> -1.5708
|
||||
# "r_joint4": -1.5708,
|
||||
# # 90° -> 1.5708
|
||||
# "r_joint5": 1.5708,
|
||||
# # 70° -> 1.2217
|
||||
# "r_joint6": 1.2217,
|
||||
# # Right arm joints
|
||||
"r_joint1": 0.0,
|
||||
"r_joint2": -1.5708,
|
||||
"r_joint1": -0.7854,
|
||||
# 70° -> 1.2217
|
||||
"r_joint2": 1.2217,
|
||||
# 90° -> 1.5708
|
||||
"r_joint3": 1.5708,
|
||||
# -90° -> -1.5708
|
||||
"r_joint4": -1.5708,
|
||||
# 90° -> 1.5708
|
||||
"r_joint5": 1.5708,
|
||||
# 70° -> 1.2217
|
||||
"r_joint6": 1.2217,
|
||||
# Right arm joints
|
||||
# "r_joint1": 0.0,
|
||||
# # "r_joint2": -1.5708,
|
||||
# "r_joint2": 0.0,
|
||||
"r_joint3": 0.0,
|
||||
"r_joint4": 0.0,
|
||||
"r_joint5": 0.0,
|
||||
"r_joint6": 0.0,
|
||||
# # left wheel
|
||||
# "r_joint3": 0.0,
|
||||
# "r_joint4": 0.0,
|
||||
# "r_joint5": 0.0,
|
||||
# "r_joint6": 0.0,
|
||||
# # # left wheel
|
||||
# "left_b_revolute_Joint": 0.0,
|
||||
# "left_f_revolute_Joint": 0.0,
|
||||
# # right wheel
|
||||
@@ -102,7 +103,7 @@ MINDBOT_CFG = ArticulationCfg(
|
||||
# trunk
|
||||
"PrismaticJoint": 0.3,
|
||||
# head
|
||||
"head_revoluteJoint": 0.5236
|
||||
"head_revoluteJoint": 0.0 #0.5236
|
||||
},
|
||||
# The initial (x, y, z) position of the robot's base in the world
|
||||
pos=(0, 0, 0.05),
|
||||
@@ -114,7 +115,7 @@ MINDBOT_CFG = ArticulationCfg(
|
||||
joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
|
||||
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
|
||||
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
|
||||
stiffness=0.0, #100000.0, # Note: Tune this for desired control performance
|
||||
stiffness=10000.0, #10000.0, # Note: Tune this for desired control performance
|
||||
damping=1000.0, #10000.0, # Note: Tune this for desired control performance
|
||||
),
|
||||
# Group for the 6 right arm joints using a regular expression
|
||||
@@ -122,7 +123,7 @@ MINDBOT_CFG = ArticulationCfg(
|
||||
joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
|
||||
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
|
||||
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
|
||||
stiffness=10000.0, #100000.0, # Note: Tune this for desired control performance1
|
||||
stiffness=0.0, #10000.0, # Note: Tune this for desired control performance1
|
||||
damping=1000.0, #10000.0, # Note: Tune this for desired control performance
|
||||
),
|
||||
"head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
|
||||
@@ -142,9 +143,10 @@ MINDBOT_CFG = ArticulationCfg(
|
||||
),
|
||||
"grippers": ImplicitActuatorCfg(
|
||||
joint_names_expr=[".*_hand_joint.*"],
|
||||
stiffness=10000.0,
|
||||
stiffness=10000.0, #10000.0
|
||||
damping=1000.0,
|
||||
effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
|
||||
velocity_limit_sim=50.0,
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from . import agents
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
|
||||
gym.register(
|
||||
id="Template-centrifuge-lidup-v0",
|
||||
entry_point="isaaclab.envs:ManagerBasedRLEnv",
|
||||
disable_env_checker=True,
|
||||
kwargs={
|
||||
"env_cfg_entry_point": f"{__name__}.centrifuge_env_cfg:MindbotEnvCfg",
|
||||
# "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
|
||||
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
|
||||
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
|
||||
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
|
||||
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
|
||||
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
|
||||
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
|
||||
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -0,0 +1,78 @@
|
||||
params:
|
||||
seed: 42
|
||||
|
||||
# environment wrapper clipping
|
||||
env:
|
||||
# added to the wrapper
|
||||
clip_observations: 5.0
|
||||
# can make custom wrapper?
|
||||
clip_actions: 1.0
|
||||
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
# doesn't have this fine grained control but made it close
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [32, 32]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
|
||||
initializer:
|
||||
name: default
|
||||
regularizer:
|
||||
name: None
|
||||
|
||||
load_checkpoint: False # flag which sets whether to load the checkpoint
|
||||
load_path: '' # path to the checkpoint to load
|
||||
|
||||
config:
|
||||
name: cartpole_direct
|
||||
env_name: rlgpu
|
||||
device: 'cuda:0'
|
||||
device_name: 'cuda:0'
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: False
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
num_actors: -1 # configured from the script (based on num_envs)
|
||||
reward_shaper:
|
||||
scale_value: 0.1
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau : 0.95
|
||||
learning_rate: 5e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 150
|
||||
save_best_after: 50
|
||||
save_frequency: 25
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.0
|
||||
truncate_grads: True
|
||||
e_clip: 0.2
|
||||
horizon_length: 32
|
||||
minibatch_size: 16384
|
||||
mini_epochs: 8
|
||||
critic_coef: 4
|
||||
clip_value: True
|
||||
seq_length: 4
|
||||
bounds_loss_coef: 0.0001
|
||||
@@ -0,0 +1,72 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
|
||||
|
||||
|
||||
@configclass
|
||||
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
num_steps_per_env = 48 # 稍微增加每轮步数
|
||||
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
|
||||
save_interval = 100
|
||||
# experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
|
||||
experiment_name = "mindbot_pullUltrasoundLidUp"
|
||||
|
||||
policy = RslRlPpoActorCriticCfg(
|
||||
init_noise_std=0.7,
|
||||
actor_obs_normalization=True, # 开启归一化
|
||||
critic_obs_normalization=True, # 开启归一化
|
||||
# 增加网络容量:三层 MLP,宽度足够
|
||||
actor_hidden_dims=[128, 64, 32],
|
||||
critic_hidden_dims=[128, 64, 32],
|
||||
activation="elu",
|
||||
)
|
||||
|
||||
algorithm = RslRlPpoAlgorithmCfg(
|
||||
value_loss_coef=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
clip_param=0.2,
|
||||
entropy_coef=0.005, # 保持适度的探索
|
||||
num_learning_epochs=5,
|
||||
num_mini_batches=8,
|
||||
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
|
||||
schedule="adaptive",
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
desired_kl=0.01,
|
||||
max_grad_norm=1.0,
|
||||
)
|
||||
|
||||
|
||||
# @configclass
|
||||
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
# num_steps_per_env = 16
|
||||
# max_iterations = 150
|
||||
# save_interval = 50
|
||||
# experiment_name = "cartpole_direct"
|
||||
# policy = RslRlPpoActorCriticCfg(
|
||||
# init_noise_std=1.0,
|
||||
# actor_obs_normalization=False,
|
||||
# critic_obs_normalization=False,
|
||||
# actor_hidden_dims=[32, 32],
|
||||
# critic_hidden_dims=[32, 32],
|
||||
# activation="elu",
|
||||
# )
|
||||
# algorithm = RslRlPpoAlgorithmCfg(
|
||||
# value_loss_coef=1.0,
|
||||
# use_clipped_value_loss=True,
|
||||
# clip_param=0.2,
|
||||
# entropy_coef=0.005,
|
||||
# num_learning_epochs=5,
|
||||
# num_mini_batches=4,
|
||||
# learning_rate=1.0e-3,
|
||||
# schedule="adaptive",
|
||||
# gamma=0.99,
|
||||
# lam=0.95,
|
||||
# desired_kl=0.01,
|
||||
# max_grad_norm=1.0,
|
||||
# )
|
||||
@@ -0,0 +1,20 @@
|
||||
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
|
||||
seed: 42
|
||||
|
||||
n_timesteps: !!float 1e6
|
||||
policy: 'MlpPolicy'
|
||||
n_steps: 16
|
||||
batch_size: 4096
|
||||
gae_lambda: 0.95
|
||||
gamma: 0.99
|
||||
n_epochs: 20
|
||||
ent_coef: 0.01
|
||||
learning_rate: !!float 3e-4
|
||||
clip_range: !!float 0.2
|
||||
policy_kwargs:
|
||||
activation_fn: nn.ELU
|
||||
net_arch: [32, 32]
|
||||
squash_output: False
|
||||
vf_coef: 1.0
|
||||
max_grad_norm: 1.0
|
||||
device: "cuda:0"
|
||||
@@ -0,0 +1,111 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: -2.9
|
||||
fixed_log_std: True
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
discriminator: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
# AMP memory (reference motion dataset)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
motion_dataset:
|
||||
class: RandomMemory
|
||||
memory_size: 200000
|
||||
|
||||
# AMP memory (preventing discriminator overfitting)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
reply_buffer:
|
||||
class: RandomMemory
|
||||
memory_size: 1000000
|
||||
|
||||
|
||||
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
|
||||
agent:
|
||||
class: AMP
|
||||
rollouts: 16
|
||||
learning_epochs: 6
|
||||
mini_batches: 2
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-05
|
||||
learning_rate_scheduler: null
|
||||
learning_rate_scheduler_kwargs: null
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
amp_state_preprocessor: RunningStandardScaler
|
||||
amp_state_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 0.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.5
|
||||
discriminator_loss_scale: 5.0
|
||||
amp_batch_size: 512
|
||||
task_reward_weight: 0.0
|
||||
style_reward_weight: 1.0
|
||||
discriminator_batch_size: 4096
|
||||
discriminator_reward_scale: 2.0
|
||||
discriminator_logit_regularization_scale: 0.05
|
||||
discriminator_gradient_penalty_scale: 5.0
|
||||
discriminator_weight_decay_scale: 1.0e-04
|
||||
# rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "humanoid_amp_run"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 80000
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
|
||||
agent:
|
||||
class: IPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,82 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
|
||||
agent:
|
||||
class: MAPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
shared_state_preprocessor: RunningStandardScaler
|
||||
shared_state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
|
||||
agent:
|
||||
class: PPO
|
||||
rollouts: 32
|
||||
learning_epochs: 8
|
||||
mini_batches: 8
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 0.1
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cartpole_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,636 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
from re import A, T
|
||||
from tkinter import N
|
||||
import torch
|
||||
from isaaclab.actuators import ImplicitActuatorCfg
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
|
||||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||||
from isaaclab.managers import EventTermCfg as EventTerm
|
||||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||||
from isaaclab.managers import CurriculumTermCfg as CurrTerm
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.utils import configclass
|
||||
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
|
||||
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
|
||||
from isaaclab.sensors import CameraCfg
|
||||
|
||||
from . import mdp
|
||||
|
||||
##
|
||||
# Pre-defined configs
|
||||
##
|
||||
|
||||
from mindbot.robot.mindbot import MINDBOT_CFG
|
||||
|
||||
# ====== 其他物体配置 ======
|
||||
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
|
||||
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
|
||||
|
||||
|
||||
TABLE_CFG=RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Table",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.95, -0.3, 0.005],
|
||||
rot=[0.7071, 0.0, 0.0, 0.7071],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
LID_CFG = RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Lid",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.95, 0.73, 0.1055],
|
||||
rot=[1.0, 0.0, 0.0, 0.0],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
|
||||
# scale=(0.2, 0.2, 0.2),
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
max_angular_velocity=1000.0,
|
||||
max_linear_velocity=1000.0,
|
||||
max_depenetration_velocity=0.5,#original 5.0
|
||||
linear_damping=5.0, #original 0.5
|
||||
angular_damping=5.0, #original 0.5
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
CENTRIFUGE_CFG = ArticulationCfg(
|
||||
spawn=sim_utils.UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
|
||||
rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
disable_gravity=False,
|
||||
max_depenetration_velocity=1.0,
|
||||
linear_damping=0.5,
|
||||
angular_damping=0.5,
|
||||
),
|
||||
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
|
||||
enabled_self_collisions=False,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
stabilization_threshold=1e-6,
|
||||
# 【重要】必须要固定底座,否则机器人按盖子时,离心机会翻倒
|
||||
# fix_root_link=True,
|
||||
),
|
||||
),
|
||||
init_state=ArticulationCfg.InitialStateCfg(
|
||||
# 1. 参照机器人配置,在这里定义初始关节角度
|
||||
joint_pos={
|
||||
"r1": math.radians(0.0), # 转子位置(无关紧要)
|
||||
|
||||
# 【关键修改】:初始让盖子处于打开状态
|
||||
# 您的 USD 限位是 (-100, 0),-100度为最大开启
|
||||
"r2": math.radians(-100.0),
|
||||
},
|
||||
pos=(0.95, -0.3, 0.855),
|
||||
rot=[-0.7071, 0.0, 0.0, 0.7071],
|
||||
),
|
||||
actuators={
|
||||
"lid_passive_mechanism": ImplicitActuatorCfg(
|
||||
joint_names_expr=["r2"],
|
||||
|
||||
# 【关键差异说明】
|
||||
# 机器人的手臂 stiffness 是 10000.0,因为机器人需要精准定位且不晃动。
|
||||
#
|
||||
# 但是!离心机盖子如果设为 10000,它就会像焊死在空中一样,机器人根本按不动。
|
||||
# 这里设为 200.0 左右:
|
||||
# 1. 力度足以克服重力,让盖子初始保持打开。
|
||||
# 2. 力度又足够软,当机器人用力下压时,盖子会顺从地闭合。
|
||||
stiffness=200.0,
|
||||
|
||||
# 阻尼:给一点阻力,模拟液压杆手感,防止像弹簧一样乱晃
|
||||
damping=20.0,
|
||||
|
||||
# 确保力矩上限不要太小,否则托不住盖子
|
||||
effort_limit_sim=1000.0,
|
||||
velocity_limit_sim=100.0,
|
||||
),
|
||||
# 转子可以硬一点,不需要被机器人按动
|
||||
"rotor_control": ImplicitActuatorCfg(
|
||||
joint_names_expr=["r1"],
|
||||
stiffness=1000.0,
|
||||
damping=10.0,
|
||||
),
|
||||
}
|
||||
)
|
||||
# CENTRIFUGE_CFG = ArticulationCfg(
|
||||
# spawn=sim_utils.UsdFileCfg(
|
||||
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
|
||||
# rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
# disable_gravity=False,
|
||||
# max_depenetration_velocity=1.0,
|
||||
# linear_damping=0.5,
|
||||
# angular_damping=0.5,
|
||||
# ),
|
||||
# articulation_props=sim_utils.ArticulationRootPropertiesCfg(
|
||||
# enabled_self_collisions=False,#
|
||||
# solver_position_iteration_count=32,
|
||||
# solver_velocity_iteration_count=16,
|
||||
# stabilization_threshold=1e-6,
|
||||
# fix_root_link=True,
|
||||
# ),
|
||||
# ),
|
||||
# init_state=ArticulationCfg.InitialStateCfg(
|
||||
# joint_pos={
|
||||
# "r1": math.radians(-100.0),
|
||||
# "r2": math.radians(-100.0),
|
||||
# },
|
||||
# pos=(0.95, -0.3, 0.855),
|
||||
# rot=[-0.7071, 0.0, 0.0, 0.7071],
|
||||
# ),
|
||||
# # actuators={}
|
||||
# actuators={
|
||||
# "passive_damper": ImplicitActuatorCfg(
|
||||
# # ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他)
|
||||
# joint_names_expr=["r2"],
|
||||
|
||||
# # 【关键逻辑】
|
||||
# # 使用较高的 Stiffness (刚度) 模拟电机或弹簧锁死在特定位置
|
||||
# # 这样盖子才不会掉下来
|
||||
# stiffness=200.0,
|
||||
|
||||
# # 适当的阻尼,防止盖子像弹簧一样疯狂抖动
|
||||
# damping=20.0,
|
||||
|
||||
# # 确保力矩足够克服盖子的重力
|
||||
# effort_limit_sim=1000.0,
|
||||
# velocity_limit_sim=100.0,
|
||||
# ),
|
||||
# # 如果有其他关节(如转子r1),可以单独配置
|
||||
# "rotor_control": ImplicitActuatorCfg(
|
||||
# joint_names_expr=["r1"],
|
||||
# stiffness=1000.0,
|
||||
# damping=10.0,
|
||||
# # joint_names_expr=[".*"],
|
||||
|
||||
# # stiffness=0.0,
|
||||
# # damping=10.0,
|
||||
# # effort_limit_sim=100.0,
|
||||
# # velocity_limit_sim=100.0,
|
||||
# ),
|
||||
# }
|
||||
# )
|
||||
|
||||
|
||||
ROOM_CFG = AssetBaseCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Room",
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
|
||||
),
|
||||
)
|
||||
|
||||
##
|
||||
# Scene definition
|
||||
##
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotSceneCfg(InteractiveSceneCfg):
|
||||
"""Configuration for a cart-pole scene."""
|
||||
|
||||
# ground plane
|
||||
ground = AssetBaseCfg(
|
||||
prim_path="/World/ground",
|
||||
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
|
||||
)
|
||||
|
||||
# robot
|
||||
# Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
|
||||
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
|
||||
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
|
||||
centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
|
||||
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
|
||||
# lights
|
||||
dome_light = AssetBaseCfg(
|
||||
prim_path="/World/DomeLight",
|
||||
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
|
||||
)
|
||||
"""
|
||||
添加相机定义
|
||||
# head_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_type=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
"""
|
||||
# left_hand_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_types=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
# right_hand_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_types=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
# head_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_types=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
# chest_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_types=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
##
|
||||
# MDP settings
|
||||
##
|
||||
|
||||
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
|
||||
asset = env.scene[asset_cfg.name]
|
||||
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
|
||||
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
|
||||
return pos_w
|
||||
|
||||
|
||||
@configclass
|
||||
class ActionsCfg:
|
||||
"""Action specifications for the MDP."""
|
||||
|
||||
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维)
|
||||
left_arm_ee = DifferentialInverseKinematicsActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["l_joint[1-6]"], # 左臂的6个关节
|
||||
body_name="left_hand_body", # 末端执行器body名称
|
||||
controller=DifferentialIKControllerCfg(
|
||||
command_type="pose", # 控制位置+姿态
|
||||
use_relative_mode=True, # 相对模式:动作是增量
|
||||
ik_method="dls", # Damped Least Squares方法
|
||||
),
|
||||
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
|
||||
)
|
||||
|
||||
# right_arm_fixed = mdp.JointPositionActionCfg(
|
||||
# asset_name="Mindbot",
|
||||
# joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
|
||||
|
||||
# # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动
|
||||
# scale=0.0,
|
||||
|
||||
# # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
|
||||
# # 对应 (135, -70, -90, 90, 90, -70)
|
||||
# offset={
|
||||
# "r_joint1": 2.3562,
|
||||
# "r_joint2": -1.2217,
|
||||
# "r_joint3": -1.5708,
|
||||
# "r_joint4": 1.5708,
|
||||
# "r_joint5": 1.5708,
|
||||
# "r_joint6": -1.2217,
|
||||
# },
|
||||
# )
|
||||
|
||||
grippers_position = mdp.BinaryJointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
|
||||
|
||||
# 修正:使用字典格式
|
||||
# open_command_expr={"关节名正则": 值}
|
||||
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
|
||||
|
||||
# close_command_expr={"关节名正则": 值}
|
||||
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
|
||||
)
|
||||
|
||||
|
||||
trunk_position = mdp.JointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["PrismaticJoint"],
|
||||
scale=0.00,
|
||||
offset=0.3,
|
||||
clip=None
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class ObservationsCfg:
|
||||
"""Observation specifications for the MDP."""
|
||||
|
||||
@configclass
|
||||
class PolicyCfg(ObsGroup):
|
||||
"""Observations for policy group."""
|
||||
|
||||
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
|
||||
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
|
||||
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
|
||||
centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.enable_corruption = False
|
||||
self.concatenate_terms = True
|
||||
|
||||
# observation groups
|
||||
policy: PolicyCfg = PolicyCfg()
|
||||
|
||||
|
||||
@configclass
|
||||
class EventCfg:
|
||||
"""Configuration for events."""
|
||||
|
||||
# 重置所有关节到 init_state(无偏移)must be
|
||||
reset_mindbot_all_joints = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot"),
|
||||
"position_range": (0.0, 0.0),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
# 2. 底座安装误差 (建议缩小到 2cm)
|
||||
reset_mindbot_base = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot"),
|
||||
"pose_range": {
|
||||
"x": (-0.05, 0.05),
|
||||
"y": (-0.05, 0.05),
|
||||
"z": (0.74, 0.75),
|
||||
"yaw": (-0.1, 0.1),
|
||||
},
|
||||
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
|
||||
},
|
||||
)
|
||||
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
|
||||
reset_mindbot_left_arm = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
|
||||
"position_range": (-0.01, 0.01),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
|
||||
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
|
||||
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
reset_centrifuge = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("centrifuge"),
|
||||
"pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
|
||||
"velocity_range": {"x": (0.0, 0.0)}
|
||||
}
|
||||
)
|
||||
reset_lid = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("lid"),
|
||||
"pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
|
||||
"velocity_range": {"x": (0.0, 0.0)}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class RewardsCfg:
|
||||
"""Reward terms for the MDP."""
|
||||
|
||||
gripper_lid_orientation_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_orientation_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"gripper_body_name": "left_hand_body",
|
||||
"gripper_forward_axis": (0.0, 0.0, 1.0),
|
||||
"gripper_width_axis": (0.0, 1.0, 0.0),
|
||||
"lid_handle_axis": (0.0, 1.0, 0.0),
|
||||
"max_angle_deg": 85.0, # 允许60度内的偏差
|
||||
},
|
||||
weight=5.0 #original 5.0
|
||||
)
|
||||
|
||||
# stage 2
|
||||
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
|
||||
gripper_lid_position_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_position_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07, # Z方向:lid 上方 0.1m
|
||||
"std": 0.3, # 位置对齐的容忍度
|
||||
},
|
||||
weight=3.0 #original 3.0
|
||||
)
|
||||
approach_lid_penalty = RewTerm(
|
||||
func=mdp.gripper_lid_distance_penalty,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07,
|
||||
},
|
||||
# weight 为负数表示惩罚。
|
||||
# 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。
|
||||
weight=-1.0
|
||||
)
|
||||
# 【新增】精细对齐 (引导进入 2cm 圈)
|
||||
gripper_lid_fine_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_position_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
|
||||
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
|
||||
},
|
||||
weight=10.0 # 高权重
|
||||
)
|
||||
|
||||
gripper_close_interaction = RewTerm(
|
||||
func=mdp.gripper_close_when_near,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
"target_close_pos": 0.03,
|
||||
"height_offset": 0.04,
|
||||
"grasp_radius": 0.03,
|
||||
},
|
||||
weight=10.0
|
||||
)
|
||||
|
||||
lid_lifted_reward = RewTerm(
|
||||
func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
|
||||
},
|
||||
weight=10.0 # 给一个足够大的权重
|
||||
)
|
||||
|
||||
lid_lifting_reward = RewTerm(
|
||||
func=mdp.lid_lift_progress_reward,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
|
||||
"target_height_lift": 0.2,
|
||||
"height_offset": 0.07, # 与其他奖励保持一致
|
||||
"std": 0.1
|
||||
},
|
||||
# 权重设大一点,告诉它这是最终目标
|
||||
weight=10.0
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@configclass
|
||||
class TerminationsCfg:
|
||||
"""Termination terms for the MDP."""
|
||||
|
||||
time_out = DoneTerm(func=mdp.time_out, time_out=True)
|
||||
|
||||
mindbot_fly_away = DoneTerm(
|
||||
func=mdp.base_height_failure,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
|
||||
)
|
||||
|
||||
# lid_fly_away = DoneTerm(
|
||||
# func=mdp.base_height_failure,
|
||||
# params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
|
||||
# )
|
||||
|
||||
# 新增:盖子掉落判定
|
||||
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
|
||||
# lid_dropped = DoneTerm(
|
||||
# func=mdp.base_height_failure, # 复用高度判定函数
|
||||
# params={
|
||||
# "asset_cfg": SceneEntityCfg("lid"),
|
||||
# "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
|
||||
# },
|
||||
# )
|
||||
|
||||
|
||||
|
||||
##
|
||||
# Environment configuration
|
||||
##
|
||||
|
||||
@configclass
|
||||
class CurriculumCfg:
|
||||
pass
|
||||
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# position_std_scheduler = CurrTerm(
|
||||
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
|
||||
# params={
|
||||
# # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键
|
||||
# "address": "rewards.gripper_lid_position_alignment.params.std",
|
||||
|
||||
# # 修改逻辑:使用我们刚才写的函数
|
||||
# "modify_fn": mdp.annealing_std,
|
||||
|
||||
# # 传给函数的参数
|
||||
# "modify_params": {
|
||||
# "start_step": 00, # 约 600 轮
|
||||
# "end_step": 5_000, # 约 1200 轮
|
||||
# "start_std": 0.3,
|
||||
# "end_std": 0.05,
|
||||
# }
|
||||
# }
|
||||
# )
|
||||
|
||||
@configclass
|
||||
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
|
||||
# Scene settings
|
||||
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
|
||||
# Basic settings
|
||||
observations: ObservationsCfg = ObservationsCfg()
|
||||
actions: ActionsCfg = ActionsCfg()
|
||||
events: EventCfg = EventCfg()
|
||||
# MDP settings
|
||||
rewards: RewardsCfg = RewardsCfg()
|
||||
terminations: TerminationsCfg = TerminationsCfg()
|
||||
# curriculum: CurriculumCfg = CurriculumCfg()
|
||||
|
||||
# Post initialization
|
||||
def __post_init__(self) -> None:
|
||||
"""Post initialization."""
|
||||
# general settings
|
||||
self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz
|
||||
self.episode_length_s = 10
|
||||
# viewer settings
|
||||
self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2)
|
||||
# simulation settings
|
||||
self.sim.dt = 1 / 120 #original 1 / 60
|
||||
self.sim.render_interval = self.decimation
|
||||
# # 1. 基础堆内存
|
||||
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
|
||||
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
|
||||
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
|
||||
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
|
||||
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
|
||||
|
||||
# # 5. 聚合对容量 (针对复杂的 Articulation)
|
||||
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""This sub-module contains the functions that are specific to the environment."""
|
||||
|
||||
from isaaclab.envs.mdp import * # noqa: F401, F403
|
||||
|
||||
from .rewards import * # noqa: F401, F403
|
||||
from .terminations import * # noqa: F401, F403
|
||||
from .gripper import * # noqa: F401, F403
|
||||
from .curriculums import * # noqa: F401, F403
|
||||
@@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
from isaaclab.envs.mdp import modify_term_cfg
|
||||
|
||||
def annealing_std(
|
||||
env: ManagerBasedRLEnv,
|
||||
env_ids: torch.Tensor,
|
||||
current_value: float,
|
||||
start_step: int,
|
||||
end_step: int,
|
||||
start_std: float,
|
||||
end_std: float
|
||||
):
|
||||
"""
|
||||
根据步数线性退火 std 值。
|
||||
|
||||
Args:
|
||||
current_value: 当前的参数值 (系统自动传入)
|
||||
start_step: 开始退火的步数
|
||||
end_step: 结束退火的步数
|
||||
start_std: 初始 std
|
||||
end_std: 最终 std
|
||||
"""
|
||||
current_step = env.common_step_counter
|
||||
|
||||
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
|
||||
if current_step < start_step:
|
||||
# 如果当前值还没被设为 start_std,就强制设一下,否则不动
|
||||
return start_std
|
||||
|
||||
# 2. 已经超过结束时间 -> 保持最终值
|
||||
elif current_step >= end_step:
|
||||
return end_std
|
||||
|
||||
# 3. 在中间 -> 线性插值
|
||||
else:
|
||||
ratio = (current_step - start_step) / (end_step - start_step)
|
||||
new_std = start_std + (end_std - start_std) * ratio
|
||||
return new_std
|
||||
@@ -0,0 +1,54 @@
|
||||
# 假设这是在你的 mdp.py 文件中
|
||||
|
||||
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
|
||||
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
|
||||
from isaaclab.utils import configclass
|
||||
import torch
|
||||
|
||||
|
||||
class CoupledJointPositionAction(JointPositionAction):
|
||||
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
|
||||
super().__init__(cfg, env)
|
||||
|
||||
@property
|
||||
def action_dim(self) -> int:
|
||||
"""强制 ActionManager 认为只需要 1D 输入。"""
|
||||
return 1
|
||||
|
||||
"""
|
||||
这是运行时被实例化的类。它继承自 JointPositionAction。
|
||||
"""
|
||||
def process_actions(self, actions: torch.Tensor):
|
||||
scale = self.cfg.scale
|
||||
offset = self.cfg.offset
|
||||
# store the raw actions
|
||||
self._raw_actions[:] = torch.clamp(actions, -1, 1)
|
||||
# apply the affine transformations
|
||||
target_position_interval = self._raw_actions * scale + offset
|
||||
right_cmd = target_position_interval
|
||||
left_cmd = -target_position_interval
|
||||
# import pdb; pdb.set_trace()
|
||||
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
|
||||
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
|
||||
# clip actions
|
||||
if self.cfg.clip is not None:
|
||||
self._processed_actions = torch.clamp(
|
||||
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class CoupledJointPositionActionCfg(JointPositionActionCfg):
|
||||
"""
|
||||
配置类。关键在于设置 class_type 指向上面的实现类。
|
||||
"""
|
||||
# !!! 关键点 !!!
|
||||
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
|
||||
class_type = CoupledJointPositionAction
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
|
||||
if len(self.joint_names) != 2:
|
||||
raise ValueError("Requires exactly two joint names.")
|
||||
super().__post_init__()
|
||||
|
||||
@@ -0,0 +1,881 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from isaaclab.assets import Articulation, RigidObject
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
"""Penalize joint position deviation from a target value."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
# wrap the joint positions to (-pi, pi)
|
||||
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
|
||||
# compute the reward
|
||||
return torch.sum(torch.square(joint_pos - target), dim=1)
|
||||
|
||||
|
||||
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
|
||||
"""Reward the agent for lifting the lid above the minimal height."""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
|
||||
height = lid.data.root_pos_w[:, 2]
|
||||
return torch.where(height > minimal_height, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for reaching the lid using tanh-kernel."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# float 化,防止成为 (3,) 向量
|
||||
if not isinstance(std, float):
|
||||
if torch.is_tensor(std):
|
||||
std = std.item()
|
||||
else:
|
||||
std = float(std)
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
# body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
|
||||
|
||||
return 1 - torch.tanh(distance / std)
|
||||
|
||||
|
||||
def lid_grasped(env: ManagerBasedRLEnv,
|
||||
distance_threshold: float = 0.05,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for grasping the lid (close and lifted)."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
|
||||
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
|
||||
|
||||
# Check if close and lifted
|
||||
is_close = distance < distance_threshold
|
||||
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
|
||||
|
||||
return torch.where(is_close & is_lifted, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Combined reward for reaching the lid AND lifting it."""
|
||||
# Get reaching reward
|
||||
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
|
||||
# Get lifting reward
|
||||
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
|
||||
# Combine rewards multiplicatively
|
||||
return reach_reward * lift_reward
|
||||
|
||||
|
||||
|
||||
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand__l",
|
||||
# right_gripper_name: str = "left_hand_r",
|
||||
# height_offset: float = 0.1,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
# """奖励函数:夹爪相对于lid的位置对齐。
|
||||
|
||||
# Args:
|
||||
# env: 环境实例
|
||||
# lid_cfg: lid的配置
|
||||
# robot_cfg: 机器人的配置
|
||||
# left_gripper_name: 左侧夹爪body名称
|
||||
# right_gripper_name: 右侧夹爪body名称
|
||||
# side_distance: Y方向的期望距离
|
||||
# height_offset: Z方向的期望高度偏移
|
||||
# std: tanh核函数的标准差
|
||||
|
||||
# Returns:
|
||||
# 位置对齐奖励 (num_envs,)
|
||||
# """
|
||||
# lid: RigidObject = env.scene[lid_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# # 获取两个夹爪的位置
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# # 计算夹爪中心位置
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
|
||||
# # 计算相对位置(夹爪中心相对于 lid 中心)
|
||||
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
|
||||
|
||||
# # X 方向位置:应该对齐(接近 0)
|
||||
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
|
||||
# x_reward = 1.0 - torch.tanh(x_dist / std)
|
||||
|
||||
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance)
|
||||
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
|
||||
# # y_error = torch.abs(y_dist - side_distance)
|
||||
# # y_reward = 1.0 - torch.tanh(y_error / std)
|
||||
# y_reward = 1.0 - torch.tanh(y_dist / std)
|
||||
|
||||
# # Z 方向位置:应该在 lid 上方 height_offset 处
|
||||
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
|
||||
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
|
||||
# z_reward = 1.0 - torch.tanh(z_error / std)
|
||||
|
||||
# # 组合位置奖励
|
||||
# position_reward = x_reward * y_reward * z_reward
|
||||
|
||||
# return position_reward
|
||||
|
||||
|
||||
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.02,
|
||||
std: float = 0.1) -> torch.Tensor:
|
||||
"""奖励函数:夹爪相对于lid的位置对齐(简化版)。
|
||||
|
||||
计算夹爪中心到目标点(lid上方height_offset处)的距离奖励。
|
||||
|
||||
Args:
|
||||
env: 环境实例
|
||||
lid_cfg: lid的配置
|
||||
robot_cfg: 机器人的配置
|
||||
left_gripper_name: 左侧夹爪body名称
|
||||
right_gripper_name: 右侧夹爪body名称
|
||||
height_offset: Z方向的期望高度偏移(目标点在lid上方)
|
||||
std: tanh核函数的标准差
|
||||
|
||||
Returns:
|
||||
位置对齐奖励 (num_envs,)
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 目标点:lid位置 + height_offset(Z方向)
|
||||
target_pos = lid_pos_w.clone()
|
||||
# print(target_pos[0])
|
||||
target_pos[:, 2] += height_offset # 在lid上方height_offset处
|
||||
# print(target_pos[0])
|
||||
# 获取两个夹爪的位置
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# 计算夹爪中心位置
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
# print(gripper_center[0])
|
||||
# 计算夹爪中心到目标点的3D距离
|
||||
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# 距离奖励:距离越小,奖励越大
|
||||
position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
# ... (保留原有的 import)
|
||||
|
||||
# ==============================================================================
|
||||
# 新增:直接距离惩罚 (线性引导)
|
||||
# ==============================================================================
|
||||
def gripper_lid_distance_penalty(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.07,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
计算夹爪中心到目标点的欧式距离。
|
||||
返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
|
||||
这样:距离越远 -> 惩罚越大 (Total Reward 越低)。
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# 1. 目标点 (Lid中心 + 高度偏移)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
lid_pos_w[:, 2] += height_offset
|
||||
|
||||
# 2. 夹爪中心
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
gripper_center = (left_pos + right_pos) / 2.0
|
||||
|
||||
# 3. 计算距离
|
||||
distance = torch.norm(gripper_center - lid_pos_w, dim=1)
|
||||
|
||||
return distance
|
||||
|
||||
# ==============================================================================
|
||||
# 优化:位置对齐 (保持原函数,但确保逻辑清晰)
|
||||
# ==============================================================================
|
||||
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.02,
|
||||
std: float = 0.1) -> torch.Tensor:
|
||||
"""奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
target_pos = lid_pos_w.clone()
|
||||
target_pos[:, 2] += height_offset
|
||||
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
|
||||
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight
|
||||
position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
# def gripper_lid_orientation_alignment(
|
||||
# env,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# gripper_body_name: str = "left_hand_body",
|
||||
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
|
||||
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
|
||||
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 获取姿态
|
||||
# lid_quat_w = lid.data.root_quat_w
|
||||
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
|
||||
|
||||
# # 3. 垂直对齐 (Vertical Alignment)
|
||||
# # 目标:夹爪 +Y 指向 World -Z
|
||||
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
|
||||
# # print(grip_fwd_world[0])
|
||||
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
|
||||
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
|
||||
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
|
||||
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
|
||||
# # rew_vertical = (dot_vertical + 1.0) / 2.0
|
||||
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
|
||||
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
|
||||
# # nan -> 0
|
||||
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
|
||||
|
||||
# # 4. 偏航对齐 (Yaw Alignment)
|
||||
# # 目标:夹爪 +Z 平行于 Lid +Y
|
||||
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
|
||||
|
||||
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
|
||||
|
||||
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
|
||||
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
|
||||
# # nan -> 0
|
||||
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
|
||||
|
||||
# # 5. 组合
|
||||
# total_reward = rew_vertical * rew_yaw
|
||||
|
||||
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
|
||||
# # debug
|
||||
# if not torch.isfinite(total_reward).all():
|
||||
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
# print(f"rew_vertical: {rew_vertical}")
|
||||
# print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
# return total_reward
|
||||
|
||||
|
||||
def gripper_lid_orientation_alignment(
|
||||
env,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body",
|
||||
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
|
||||
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
|
||||
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
|
||||
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 获取对象
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 获取姿态
|
||||
lid_quat_w = lid.data.root_quat_w
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
|
||||
|
||||
# 3. 垂直对齐 (Vertical Alignment)
|
||||
# 目标:夹爪前向轴指向 World -Z(向下)
|
||||
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
|
||||
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# 计算点积(归一化后,点积 = cos(角度))
|
||||
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
|
||||
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
|
||||
|
||||
# 计算角度(弧度)
|
||||
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
|
||||
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
|
||||
|
||||
# 如果角度 <= max_angle_deg,给予奖励
|
||||
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
|
||||
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
|
||||
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
|
||||
|
||||
# 如果角度超过60度,奖励为0
|
||||
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
|
||||
|
||||
# nan -> 0
|
||||
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
|
||||
|
||||
# 4. 偏航对齐 (Yaw Alignment)
|
||||
# 目标:夹爪 +Z 平行于 Lid +Y
|
||||
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
|
||||
|
||||
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
|
||||
|
||||
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
|
||||
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
|
||||
# nan -> 0
|
||||
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
|
||||
|
||||
# 5. 组合
|
||||
total_reward = rew_vertical * rew_yaw
|
||||
|
||||
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
|
||||
# debug
|
||||
if not torch.isfinite(total_reward).all():
|
||||
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
print(f"rew_vertical: {rew_vertical}")
|
||||
print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
return total_reward
|
||||
|
||||
|
||||
# def gripper_open_close_reward(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
|
||||
# # 注意:确保 joint_names 只包含那两个夹爪关节
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
|
||||
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
|
||||
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 逻辑很简单:
|
||||
# 1. 如果 距离 < close_threshold:目标关节位置 = 0.03 (闭合)
|
||||
# 2. 如果 距离 >= close_threshold:目标关节位置 = 0.0 (张开)
|
||||
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
|
||||
# """
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 计算距离 (末端 vs Lid中心)
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# # distance: (num_envs,)
|
||||
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
|
||||
|
||||
# # 3. 动态确定目标关节值
|
||||
# # 如果距离小于阈值,目标设为 close_pos,否则设为 open_pos
|
||||
# # target_val: (num_envs, 1)
|
||||
# # print(distance)
|
||||
# target_val = torch.where(
|
||||
# distance < close_threshold,
|
||||
# torch.tensor(target_close_pos, device=env.device),
|
||||
# torch.tensor(target_open_pos, device=env.device)
|
||||
# ).unsqueeze(1)
|
||||
|
||||
# # 4. 获取当前关节位置
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # joint_pos: (num_envs, 2)
|
||||
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
|
||||
|
||||
# # 取绝对值(防止左指是负数的情况,虽然你的配置里范围看起来都是正的,加上abs更保险)
|
||||
# current_joint_pos_abs = torch.abs(current_joint_pos)
|
||||
|
||||
# # 5. 计算误差
|
||||
# # error: (num_envs,) -> 两个手指误差的平均值
|
||||
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
|
||||
|
||||
# # # 6. 返回负奖励 (惩罚)
|
||||
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
|
||||
# # return -error
|
||||
# # =====================================================
|
||||
# # 🚀 核心修复:安全输出
|
||||
# # =====================================================
|
||||
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
|
||||
# reward = -torch.tanh(error / 0.01)
|
||||
|
||||
# # 2. 过滤 NaN!这一步能救命!
|
||||
# # 如果物理引擎算出 NaN,这里会把它变成 0.0,防止训练崩溃
|
||||
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
# def gripper_open_close_reward(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand_body",
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# close_threshold: float = 0.05,
|
||||
# target_open_pos: float = 0.0,
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 逻辑:
|
||||
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
|
||||
# 2. 如果距离 < close_threshold:目标关节位置 = 闭合。
|
||||
# 3. 否则:目标关节位置 = 张开。
|
||||
# """
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
|
||||
|
||||
# # 获取夹爪位置
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# # 3. 计算距离 (夹爪 vs 目标抓取点)
|
||||
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
|
||||
|
||||
# # 4. 动态确定目标关节值
|
||||
# # 靠近抓取点 -> 闭合;远离 -> 张开
|
||||
# target_val = torch.where(
|
||||
# distance < close_threshold,
|
||||
# torch.tensor(target_close_pos, device=env.device),
|
||||
# torch.tensor(target_open_pos, device=env.device)
|
||||
# ).unsqueeze(1)
|
||||
|
||||
# # 5. 获取当前关节位置
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
|
||||
# current_joint_pos_abs = torch.abs(current_joint_pos)
|
||||
|
||||
# # 6. 计算误差并返回奖励
|
||||
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
|
||||
|
||||
# # 使用 tanh 限制数值范围
|
||||
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
def _is_grasped(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg,
|
||||
robot_cfg: SceneEntityCfg,
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list,
|
||||
height_offset: float,
|
||||
grasp_radius: float,
|
||||
target_close_pos: float
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
内部辅助函数:判定是否成功抓取。
|
||||
条件:
|
||||
1. 夹爪中心在把手目标点附近 (Sphere Check)
|
||||
2. 夹爪处于闭合发力状态 (Joint Effort Check)
|
||||
3. (关键) 夹爪Z轴高度合适,不能压在盖子上面 (Z-Axis Check)
|
||||
"""
|
||||
# 1. 获取对象
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 目标点位置 (把手中心)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
lid_pos_w[:, 2] += height_offset
|
||||
|
||||
# 3. 夹爪位置
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
gripper_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# 4. 条件一:距离判定 (在把手球范围内)
|
||||
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
|
||||
is_near = dist_to_handle < grasp_radius
|
||||
|
||||
# 5. 条件二:Z轴高度判定 (防止压在盖子上)
|
||||
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
|
||||
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
|
||||
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
|
||||
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
|
||||
|
||||
# 6. 条件三:闭合判定 (正在尝试闭合)
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
# 获取关节位置 (绝对值)
|
||||
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
|
||||
# === 修改闭合判定 ===
|
||||
|
||||
# 1. 关节位置判定 (Effort Check)
|
||||
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
|
||||
# 假设 0.05 是你的新 target_close_pos
|
||||
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
|
||||
|
||||
# 2. 几何排斥判定 (Geometry Check)
|
||||
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
|
||||
# 获取指尖位置
|
||||
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# 假设把手厚度是 1cm (0.01m)
|
||||
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
|
||||
# 如果指尖距离 >= 0.005,说明中间有东西挡着
|
||||
is_not_empty = dist_fingertips > 0.005
|
||||
|
||||
# 综合判定:
|
||||
# 1. 在把手附近 (is_near)
|
||||
# 2. 高度合适 (is_z_valid)
|
||||
# 3. 在用力闭合 (is_effort_closing)
|
||||
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
|
||||
return is_near & is_z_valid & is_effort_closing & is_not_empty
|
||||
|
||||
|
||||
def gripper_close_when_near(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
target_close_pos: float = 0.03,
|
||||
height_offset: float = 0.02,
|
||||
grasp_radius: float = 0.05
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定基础抓取条件是否满足
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算夹紧程度 (Clamping Intensity)
|
||||
robot = env.scene[robot_cfg.name]
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
# 获取当前关节位置绝对值 (num_envs, num_joints)
|
||||
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# 计算两个指关节的平均闭合深度
|
||||
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
|
||||
|
||||
# 计算奖励系数:当前位置 / 目标闭合位置
|
||||
# 这样当关节越接近 0.03 时,奖励越接近 1.0
|
||||
# 强制让 Agent 产生“压满”动作的冲动
|
||||
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
|
||||
|
||||
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
|
||||
# 如果没对准或者没夹到,奖励依然是 0
|
||||
return torch.where(is_grasped, clamping_factor, 0.0)
|
||||
|
||||
# def gripper_close_when_near(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
|
||||
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
|
||||
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.03,
|
||||
# grasp_radius: float = 0.02 # 球面半径 2cm
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取位置
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 把手中心
|
||||
|
||||
# # 获取左右指尖位置
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# # 计算夹爪中心
|
||||
# pos_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# # 2. 距离判定 (Is Inside Sphere?)
|
||||
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
|
||||
# is_in_sphere = (dist_center < grasp_radius).float()
|
||||
|
||||
# # 3. "在中间"判定 (Is Between?)
|
||||
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
|
||||
# vec_LR = pos_R - pos_L # 左指指向右指
|
||||
# vec_LO = lid_pos_w - pos_L # 左指指向物体
|
||||
|
||||
# # 计算投影比例 t
|
||||
# # P_proj = P_L + t * (P_R - P_L)
|
||||
# # t = (vec_LO . vec_LR) / |vec_LR|^2
|
||||
# # 如果 0 < t < 1,说明投影在两个指尖之间
|
||||
|
||||
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
|
||||
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
|
||||
# t = dot / len_sq
|
||||
|
||||
# is_between = (t > 0.0) & (t < 1.0)
|
||||
# is_between = is_between.float()
|
||||
|
||||
# # 4. 闭合判定
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
|
||||
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
|
||||
# # 只有当非常接近闭合目标时才给分
|
||||
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
|
||||
|
||||
# # 5. 最终奖励
|
||||
# # 只有三者同时满足才给 1.0 分
|
||||
# reward = is_in_sphere * is_between * is_closing
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
# def lid_is_lifted(
|
||||
# env:ManagerBasedRLEnv,
|
||||
# minimal_height:float,
|
||||
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# lid_height = lid.data.root_pos_w[:, 2]
|
||||
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
|
||||
# reward=torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
def lid_is_lifted(
|
||||
env: ManagerBasedRLEnv,
|
||||
minimal_height: float = 0.05, # 相对提升阈值
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
height_offset: float = 0.03,
|
||||
grasp_radius: float = 0.1,
|
||||
target_close_pos: float = 0.03,
|
||||
) -> torch.Tensor:
|
||||
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
lid = env.scene[lid_cfg.name]
|
||||
|
||||
# 1. 获取高度
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
# 自动获取初始高度
|
||||
initial_height = lid.data.default_root_state[:, 2]
|
||||
|
||||
# 2. 计算提升量
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0)
|
||||
|
||||
# 3. 速度检查 (防撞飞)
|
||||
# 如果速度 > 1.0 m/s,视为被撞飞,不给分
|
||||
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
|
||||
is_stable = lid_vel < 1.0
|
||||
|
||||
# 4. 计算奖励
|
||||
# 基础分:高度越高分越高
|
||||
shaping_reward = lift_height * 2.0
|
||||
# 成功分:超过阈值
|
||||
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
|
||||
|
||||
# 组合
|
||||
# 必须 is_grasped AND is_stable
|
||||
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
|
||||
|
||||
return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
|
||||
def lid_lift_success_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand_body",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# 关键参数
|
||||
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
|
||||
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
|
||||
grasp_dist_threshold: float = 0.05, # 抓取判定距离
|
||||
closed_pos: float = 0.03 # 夹爪闭合阈值
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
|
||||
"""
|
||||
# 1. 获取数据
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 盖子实时高度
|
||||
lid_z = lid.data.root_pos_w[:, 2]
|
||||
lid_pos = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 夹爪位置
|
||||
body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# 2. 【条件一:是否夹稳 (Is Grasped?)】
|
||||
# 距离检查
|
||||
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
|
||||
is_close = distance < grasp_dist_threshold
|
||||
|
||||
# 闭合检查
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# 假设 > 80% 的闭合度算抓紧了
|
||||
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
|
||||
|
||||
is_grasped = is_close & is_closed
|
||||
|
||||
# 3. 【条件二:计算相对提升 (Relative Lift)】
|
||||
# 当前高度 - 初始稳定高度
|
||||
current_lift = lid_z - settled_height
|
||||
|
||||
# 4. 计算奖励
|
||||
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
|
||||
|
||||
# 基础提升分 (Shaping Reward)
|
||||
lift_reward = lift_progress
|
||||
|
||||
# 成功大奖 (Success Bonus)
|
||||
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
|
||||
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
|
||||
|
||||
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
|
||||
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
|
||||
|
||||
# 5. 安全输出
|
||||
return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
def lid_lift_progress_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
initial_height: float = 0.8,
|
||||
target_height_lift: float = 0.15,
|
||||
height_offset: float = 0.02, # 必须与抓取奖励保持一致
|
||||
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
|
||||
target_close_pos: float = 0.03,
|
||||
std: float = 0.05 # 标准差
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定是否夹住
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算高度
|
||||
lid = env.scene[lid_cfg.name]
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
|
||||
# print(current_height)
|
||||
# print(lift_height)
|
||||
# 3. 计算奖励
|
||||
# 只有在 is_grasped 为 True 时,才发放高度奖励
|
||||
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
|
||||
progress = torch.tanh(lift_height / std)
|
||||
|
||||
reward = torch.where(is_grasped, progress, 0.0)
|
||||
|
||||
return reward
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
from isaaclab.assets import Articulation, RigidObject
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
def lid_dropped(env: ManagerBasedRLEnv,
|
||||
minimum_height: float = -0.05,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
return lid.data.root_pos_w[:, 2] < minimum_height
|
||||
|
||||
def lid_successfully_grasped(env: ManagerBasedRLEnv,
|
||||
distance_threshold: float = 0.03,
|
||||
height_threshold: float = 0.2,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
|
||||
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
|
||||
is_close = distance < distance_threshold
|
||||
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
|
||||
return is_close & is_lifted
|
||||
|
||||
def gripper_at_lid_side(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l", # 改为两个下划线
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
side_distance: float = 0.05,
|
||||
side_tolerance: float = 0.01,
|
||||
alignment_tolerance: float = 0.02,
|
||||
height_offset: float = 0.1,
|
||||
height_tolerance: float = 0.02) -> torch.Tensor:
|
||||
"""Terminate when gripper center is positioned on the side of the lid at specified height.
|
||||
|
||||
坐标系说明:
|
||||
- X 方向:两个夹爪朝中心合并的方向
|
||||
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
|
||||
- Z 方向:高度方向
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 获取两个夹爪的位置
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
# 计算夹爪中心位置
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
rel_pos = gripper_center - lid_pos_w
|
||||
|
||||
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance)
|
||||
y_dist = torch.abs(rel_pos[:, 1])
|
||||
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
|
||||
|
||||
# X 方向:应该对齐(接近 0)
|
||||
x_dist = torch.abs(rel_pos[:, 0])
|
||||
x_ok = x_dist < alignment_tolerance
|
||||
|
||||
# Z 方向:应该在 lid 上方 height_offset 处
|
||||
z_error = torch.abs(rel_pos[:, 2] - height_offset)
|
||||
z_ok = z_error < height_tolerance
|
||||
|
||||
# 所有条件都要满足
|
||||
return x_ok & y_ok & z_ok
|
||||
|
||||
|
||||
def base_height_failure(env: ManagerBasedRLEnv,
|
||||
minimum_height: float | None = None,
|
||||
maximum_height: float | None = None,
|
||||
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
|
||||
"""Terminate when the robot's base height is outside the specified range."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
root_pos_z = asset.data.root_pos_w[:, 2]
|
||||
|
||||
# check if height is outside the range
|
||||
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
|
||||
if minimum_height is not None:
|
||||
out_of_bounds |= root_pos_z < minimum_height
|
||||
if maximum_height is not None:
|
||||
out_of_bounds |= root_pos_z > maximum_height
|
||||
|
||||
return out_of_bounds
|
||||
|
||||
@@ -177,200 +177,11 @@ def gripper_handle_orientation_alignment(
|
||||
# debug
|
||||
if not torch.isfinite(total_reward).all():
|
||||
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
print(f"rew_vertical: {rew_forward}")
|
||||
print(f"rew_vertical: {dot_fwd}")
|
||||
print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
return total_reward
|
||||
|
||||
# # 计算点积:我们希望它们垂直,即点积为 0
|
||||
# dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
|
||||
|
||||
# # 奖励:1.0 - abs(dot)。点积越小(越垂直),分数越高
|
||||
# rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
|
||||
|
||||
# # 稍微增强一下敏感度,让它更倾向于完美垂直
|
||||
# rew_perpendicular = rew_perpendicular * rew_perpendicular
|
||||
|
||||
# # 3. 组合奖励
|
||||
# total_reward = rew_forward * rew_perpendicular
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_gripper_name: str = "left_hand__l",
|
||||
# right_gripper_name: str = "left_hand_r",
|
||||
# height_offset: float = 0.03,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
|
||||
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# # lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# # [num_envs, 3]
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# # 目标点:lid位置 + height_offset(Z方向)
|
||||
# target_pos = handle_pos_w.clone()
|
||||
# # print(target_pos[0])
|
||||
# target_pos[:, 0] -= height_offset # 在lid上方height_offset处
|
||||
# # print(target_pos[0])
|
||||
# # 获取两个夹爪的位置
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# # 计算夹爪中心位置
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
# # print(gripper_center[0])
|
||||
# # 计算夹爪中心到目标点的3D距离
|
||||
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# # 距离奖励:距离越小,奖励越大
|
||||
# position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
# return position_reward
|
||||
|
||||
|
||||
# def gripper_handle_position_alignment(
|
||||
# env,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_finger_name: str = "right_hand_l",
|
||||
# right_finger_name: str = "right_hand__r",
|
||||
# # 【修改点1】:根据你的描述,长轴是 Local Z,所以改为 (0, 0, 1)
|
||||
# handle_local_axis: tuple = (0.0, 0.0, 1.0),
|
||||
# # 【新增】:明确指定手柄的“侧面”是 Local X 轴
|
||||
# handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
|
||||
# grasp_width: float = 0.03,
|
||||
# grasp_offset: float = 0.06,
|
||||
# std: float = 0.05,
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 修正版:
|
||||
# 1. 修正了手柄长轴为 Local Z。
|
||||
# 2. 使用 Local X 作为手柄两侧的基准方向。
|
||||
# """
|
||||
# # 1. 获取数据
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
|
||||
# # [num_envs, 3]
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# # [num_envs, 4]
|
||||
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
|
||||
|
||||
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
|
||||
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
|
||||
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
|
||||
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
|
||||
|
||||
# # 2. 计算关键向量
|
||||
# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z)
|
||||
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励
|
||||
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
|
||||
|
||||
# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
|
||||
# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转
|
||||
# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
|
||||
|
||||
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
|
||||
# # 注意:如果你的烘箱放置位置旋转了,这里也建议改为读取机器人的前方或者物体的Local方向
|
||||
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# # 3. 计算理想的抓取点
|
||||
# # 抓取中心:从手柄中心沿着抓取方向(World X)反向退后 grasp_offset
|
||||
# grasp_center = handle_pos_w - approach_dir * grasp_offset
|
||||
|
||||
# # 计算理想的左/右手指位置
|
||||
# # 既然 side_vec_world 是 Local X (对应 World Y)
|
||||
# # 我们希望手指在 Y 轴两侧张开
|
||||
# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
|
||||
# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
|
||||
|
||||
# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
|
||||
# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。
|
||||
|
||||
# # 4. 计算距离奖励
|
||||
# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
|
||||
# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
|
||||
|
||||
# rew_l = 1.0 - torch.tanh(dist_l / std)
|
||||
# rew_r = 1.0 - torch.tanh(dist_r / std)
|
||||
|
||||
# total_reward = (rew_l + rew_r) / 2.0
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
# def gripper_handle_position_alignment(
|
||||
# env,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_finger_name: str = "right_hand_l", # 左手指 body 名称
|
||||
# right_finger_name: str = "right_hand__r", # 右手指 body 名称
|
||||
# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
|
||||
# grasp_width: float = 0.03, # 预期的抓取宽度 (米)
|
||||
# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移
|
||||
# std: float = 0.05, # 奖励函数的敏感度
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 双指位置奖励:引导左/右手指分别到达手柄的两侧。
|
||||
# """
|
||||
# # 1. 获取数据
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
|
||||
|
||||
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
|
||||
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
|
||||
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
|
||||
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
|
||||
|
||||
# # 2. 计算理想的抓取点
|
||||
# # 获取手柄当前轴向
|
||||
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
|
||||
|
||||
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
|
||||
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向
|
||||
# side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
|
||||
|
||||
# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间)
|
||||
# grasp_center = handle_pos_w - approach_dir * grasp_offset
|
||||
|
||||
# # 计算理想的左/右手指位置 (在中心点两侧张开)
|
||||
# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右
|
||||
# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
|
||||
# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
|
||||
|
||||
# # 3. 计算距离奖励
|
||||
# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
|
||||
# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
|
||||
|
||||
# rew_l = 1.0 - torch.tanh(dist_l / std)
|
||||
# rew_r = 1.0 - torch.tanh(dist_r / std)
|
||||
|
||||
# # 取平均值
|
||||
# total_reward = (rew_l + rew_r) / 2.0
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name
|
||||
def _is_grasped(
|
||||
@@ -400,8 +211,8 @@ def _is_grasped(
|
||||
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
|
||||
handle_pos_w[:, 1] += height_offset # X方向偏移
|
||||
handle_pos_w[:, 0] += 0.02
|
||||
handle_pos_w[:, 2] -= 0.04
|
||||
handle_pos_w[:, 0] += 0.005
|
||||
# handle_pos_w[:, 2] -= 0.04
|
||||
# 3. 夹爪位置
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
@@ -411,7 +222,7 @@ def _is_grasped(
|
||||
|
||||
# 4. 条件一:距离判定 (在把手球范围内)
|
||||
dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
|
||||
if env.common_step_counter % 100 == 0:
|
||||
if env.common_step_counter % 500 == 0:
|
||||
print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
|
||||
is_near = dist_to_handle < grasp_radius
|
||||
|
||||
@@ -479,80 +290,51 @@ def gripper_close_when_near(
|
||||
|
||||
# def gripper_close_when_near(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
|
||||
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
|
||||
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.03,
|
||||
# grasp_radius: float = 0.02 # 球面半径 2cm
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_gripper_name: str = "right_hand_l",
|
||||
# right_gripper_name: str = "right_hand__r",
|
||||
# joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 保留接口但不强依赖
|
||||
# # 关键参数
|
||||
# handle_thickness: float = 0.0388, # 把手厚度(米),≈3.88cm
|
||||
# thickness_tol: float = 0.008, # 容差(米),≈0.8cm,控制惩罚/奖励斜率
|
||||
# grasp_radius: float = 0.04, # 抓取判定半径,略大于把手厚度
|
||||
# height_offset: float = 0.00, # 如需上下偏移可调整
|
||||
# target_close_pos: float = 0.03, # 保留给 joint/clamping 兼容
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取位置
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 把手中心
|
||||
|
||||
# # 获取左右指尖位置
|
||||
# # 目标位置(把手中心,可做微偏移)
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
|
||||
# handle_pos[:, 1] += height_offset
|
||||
|
||||
# # 手指位置与中心
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
# center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# # 计算夹爪中心
|
||||
# pos_center = (pos_L + pos_R) / 2.0
|
||||
# # 到把手的距离(只在近距离才给分)
|
||||
# dist = torch.norm(center - handle_pos, dim=-1)
|
||||
# is_near = dist < grasp_radius
|
||||
|
||||
# # 2. 距离判定 (Is Inside Sphere?)
|
||||
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
|
||||
# is_in_sphere = (dist_center < grasp_radius).float()
|
||||
# # 实际两指间距
|
||||
# finger_dist = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# # 3. "在中间"判定 (Is Between?)
|
||||
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
|
||||
# vec_LR = pos_R - pos_L # 左指指向右指
|
||||
# vec_LO = lid_pos_w - pos_L # 左指指向物体
|
||||
# # 奖励:两指间距越接近把手厚度越好,平滑衰减
|
||||
# error = torch.abs(finger_dist - handle_thickness)
|
||||
# close_reward = 1.0 - torch.tanh(error / thickness_tol)
|
||||
|
||||
# # 计算投影比例 t
|
||||
# # P_proj = P_L + t * (P_R - P_L)
|
||||
# # t = (vec_LO . vec_LR) / |vec_LR|^2
|
||||
# # 如果 0 < t < 1,说明投影在两个指尖之间
|
||||
# # 避免 NaN
|
||||
# close_reward = torch.nan_to_num(close_reward, nan=0.0)
|
||||
|
||||
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
|
||||
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
|
||||
# t = dot / len_sq
|
||||
|
||||
# is_between = (t > 0.0) & (t < 1.0)
|
||||
# is_between = is_between.float()
|
||||
|
||||
# # 4. 闭合判定
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
|
||||
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
|
||||
# # 只有当非常接近闭合目标时才给分
|
||||
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
|
||||
|
||||
# # 5. 最终奖励
|
||||
# # 只有三者同时满足才给 1.0 分
|
||||
# reward = is_in_sphere * is_between * is_closing
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
# return torch.where(is_near, close_reward, torch.zeros_like(close_reward))
|
||||
|
||||
|
||||
# def lid_is_lifted(
|
||||
# env:ManagerBasedRLEnv,
|
||||
# minimal_height:float,
|
||||
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# lid_height = lid.data.root_pos_w[:, 2]
|
||||
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
|
||||
# reward=torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
def lid_is_lifted(
|
||||
@@ -698,39 +480,7 @@ def lid_lift_progress_reward(
|
||||
|
||||
return reward
|
||||
|
||||
# def lid_grasped_bonus(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg,
|
||||
# robot_cfg: SceneEntityCfg,
|
||||
# left_gripper_name: str,
|
||||
# joint_names: list,
|
||||
# distance_threshold: float = 0.05,
|
||||
# closed_pos: float = 0.03
|
||||
# ) -> torch.Tensor:
|
||||
# # 1. 计算距离
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
# lid_pos = lid.data.root_pos_w[:, :3]
|
||||
# distance = torch.norm(gripper_pos - lid_pos, dim=-1)
|
||||
|
||||
# # 2. 检查距离是否达标
|
||||
# is_close = distance < distance_threshold
|
||||
|
||||
# # 3. 检查夹爪是否闭合
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# # 检查是否接近闭合 (比如 > 0.02)
|
||||
# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
|
||||
|
||||
# # 4. 给分
|
||||
# # 只有同时满足才给 1.0
|
||||
# reward = torch.where(is_close & is_closed, 1.0, 0.0)
|
||||
|
||||
# return torch.nan_to_num(reward)
|
||||
|
||||
# 在 rewards.py 文件开头添加调试函数
|
||||
def debug_robot_physics(
|
||||
env: ManagerBasedRLEnv,
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
@@ -771,64 +521,15 @@ def debug_robot_physics(
|
||||
# 4. 检查是否有异常高的速度(可能导致碰撞问题)
|
||||
high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
|
||||
|
||||
if torch.any(high_vel_mask):
|
||||
env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
|
||||
for env_id in env_ids[:3]: # 只打印前3个环境
|
||||
print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
|
||||
print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
|
||||
print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
|
||||
print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
|
||||
print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
|
||||
print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
|
||||
|
||||
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_gripper_name: str = "right_hand_l",
|
||||
# right_gripper_name: str = "right_hand__r",
|
||||
# height_offset: float = 0.03,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
|
||||
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# target_pos = handle_pos_w.clone()
|
||||
# target_pos[:, 1] += height_offset
|
||||
# target_pos[:, 0] += 0.02
|
||||
# target_pos[:, 2] -= 0.04
|
||||
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# # ===== 添加调试打印 =====
|
||||
# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
|
||||
# close_mask = distance < 0.03
|
||||
# if torch.any(close_mask):
|
||||
# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
|
||||
# # ========================
|
||||
|
||||
# position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
# # 计算距离后插入
|
||||
# if env.common_step_counter % 200 == 0:
|
||||
# print("pos_align dbg env0: "
|
||||
# f"handle={handle_pos_w[0].cpu().numpy()}, "
|
||||
# f"target={target_pos[0].cpu().numpy()}, "
|
||||
# f"gripL={left_gripper_pos[0].cpu().numpy()}, "
|
||||
# f"gripR={right_gripper_pos[0].cpu().numpy()}, "
|
||||
# f"center={gripper_center[0].cpu().numpy()}, "
|
||||
# f"dist={distance[0].item():.4f}")
|
||||
|
||||
# return position_reward
|
||||
# if torch.any(high_vel_mask):
|
||||
# env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
|
||||
# for env_id in env_ids[:3]: # 只打印前3个环境
|
||||
# print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
|
||||
# print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
|
||||
# print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
|
||||
# print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
|
||||
# print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
|
||||
# print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
|
||||
|
||||
def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
@@ -846,8 +547,8 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
target_pos = handle_pos_w.clone()
|
||||
target_pos[:, 1] += height_offset
|
||||
# target_pos[:, 0] += 0.02
|
||||
target_pos[:, 2] -= 0.02
|
||||
target_pos[:, 0] += 0.005
|
||||
# target_pos[:, 2] -= 0.04
|
||||
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
@@ -878,3 +579,257 @@ def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
f"dist={distance[0].item():.4f}")
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
|
||||
def gripper_open_when_far(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用
|
||||
robot_cfg: SceneEntityCfg,
|
||||
handle_name: str, # 保留接口一致性,但不再使用
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list, # 不再使用
|
||||
dist_threshold: float = 0.5, # 不再使用
|
||||
target_open_dist: float = 0.06, # 两手指完全打开时的距离(米)
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
奖励夹爪最大张开状态:
|
||||
只根据左右两根手指之间的实际距离计算奖励,
|
||||
当距离接近 target_open_dist (0.06m) 时奖励最大。
|
||||
"""
|
||||
|
||||
# 1. 获取机器人
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 获取左右手指的世界坐标
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# 3. 计算两手指之间的实际距离
|
||||
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# 4. 奖励:距离越接近最大打开距离,奖励越高
|
||||
error = torch.abs(finger_dist - target_open_dist)
|
||||
reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度
|
||||
|
||||
return reward
|
||||
|
||||
|
||||
def gripper_open_penalty_when_far(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg,
|
||||
robot_cfg: SceneEntityCfg,
|
||||
handle_name: str,
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list,
|
||||
dist_threshold: float = 0.03, # 抓取豁免区(在此距离内不惩罚)
|
||||
# max_penalty_dist: float = 0.2, # 这个参数现在没用了,或者可以用来控制斜率,这里简化掉
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
修正后的惩罚项:
|
||||
在 dist_threshold (3cm) 之外,如果夹爪没有完全张开,则给予惩罚。
|
||||
"""
|
||||
robot = env.scene[robot_cfg.name]
|
||||
dryingbox = env.scene[dryingbox_cfg.name]
|
||||
|
||||
# 1. 获取位置
|
||||
handle_ids, _ = dryingbox.find_bodies([handle_name])
|
||||
handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
|
||||
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# gripper_center = (pos_L + pos_R) / 2.0
|
||||
# distance = torch.norm(gripper_center - handle_pos, dim=1)
|
||||
|
||||
# 优化计算:直接用预计算的指尖中心(如果你的环境支持),或者保持原样
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
gripper_center = (pos_L + pos_R) / 2.0
|
||||
distance = torch.norm(gripper_center - handle_pos, dim=1)
|
||||
|
||||
# 2. 计算闭合程度 (0=全开, 1=全闭)
|
||||
# 获取关节位置更直接,比算指尖距离更准
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
joint_pos = robot.data.joint_pos[:, joint_ids]
|
||||
# 假设 0.0 是开,0.03 是闭。取平均绝对值归一化。
|
||||
# closedness 越大表示闭合越多
|
||||
closedness = torch.clamp(torch.mean(torch.abs(joint_pos), dim=1) / 0.03, 0.0, 1.0)
|
||||
|
||||
# 3. 惩罚门控逻辑 (修正的核心)
|
||||
# 如果 distance > dist_threshold,开始惩罚
|
||||
# 使用 tanh 或 sigmoid 平滑过渡,避免硬开关导致的震荡
|
||||
# 当 dist = 0.03 时,gate = 0
|
||||
# 当 dist = 0.05 时,gate 接近 1
|
||||
is_far_gate = torch.tanh((distance - dist_threshold) * 100.0)
|
||||
# clip 到 [0, 1],防止 distance < threshold 时出现负数干扰
|
||||
is_far_gate = torch.clamp(is_far_gate, 0.0, 1.0)
|
||||
|
||||
# 4. 计算最终惩罚
|
||||
# 只有在 "远处" 且 "闭合" 时才扣分
|
||||
penalty = is_far_gate * closedness
|
||||
|
||||
return -penalty
|
||||
|
||||
# def gripper_open_penalty_when_far(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg,
|
||||
# robot_cfg: SceneEntityCfg,
|
||||
# handle_name: str,
|
||||
# left_gripper_name: str,
|
||||
# right_gripper_name: str,
|
||||
# joint_names: list,
|
||||
# dist_threshold: float = 0.03, # 3cm 抓取保护区
|
||||
# max_penalty_dist: float = 0.2, # 0.55m 惩罚激活边界
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 使用 Sigmoid 激活函数的惩罚项:
|
||||
# 1. 距离把手 < 0.55m 时,惩罚迅速激活并保持在高位 (接近 1.0)。
|
||||
# 2. 距离把手 < 0.03m 时,惩罚强制清零,允许闭合抓取。
|
||||
# 3. 两手指距离越小,惩罚值越大。
|
||||
# """
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
|
||||
# # 1. 获取位置信息
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name])
|
||||
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
|
||||
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# # 2. 计算距离
|
||||
# gripper_center = (pos_L + pos_R) / 2.0
|
||||
# distance = torch.norm(gripper_center - handle_pos, dim=1)
|
||||
# finger_dist = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# # 3. 距离激活门控 (Sigmoid 方法)
|
||||
# # 当 distance < 0.55 时,(0.55 - dist) 为正,sigmoid 趋近于 1.0
|
||||
# # 50.0 是陡峭系数,系数越大,0.55m 处的切换越像开关
|
||||
# dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0)
|
||||
|
||||
# # 4. 抓取区保护 (Deadzone)
|
||||
# # 距离小于 3cm 时,is_not_near 变为 0,完全撤销惩罚
|
||||
# is_not_near = (distance > dist_threshold).float()
|
||||
|
||||
# # 5. 闭合程度计算 (0 = 全开, 1 = 全闭)
|
||||
# # 假设手指最大张开距离为 0.06m
|
||||
# max_open = 0.06
|
||||
# closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0)
|
||||
|
||||
# # 6. 综合计算并 Clamp
|
||||
# # 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时,值才会很大
|
||||
# penalty_magnitude = dist_gate * is_not_near * closedness
|
||||
|
||||
# # 返回负值作为惩罚 (值域 [-1, 0])
|
||||
# return -torch.clamp(penalty_magnitude, 0.0, 1.0)
|
||||
|
||||
|
||||
# def gripper_open_until_near(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg,
|
||||
# robot_cfg: SceneEntityCfg,
|
||||
# handle_name: str,
|
||||
# left_gripper_name: str,
|
||||
# right_gripper_name: str,
|
||||
# joint_names: list,
|
||||
# dist_threshold: float = 0.03, # 3cm 阈值
|
||||
# ) -> torch.Tensor:
|
||||
# """奖励夹爪在远离目标时保持打开状态,接近后不再约束。"""
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
|
||||
# # 1. 计算夹爪中心到把手的距离
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name])
|
||||
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
|
||||
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
|
||||
|
||||
# distance = torch.norm(gripper_center - handle_pos, dim=1)
|
||||
|
||||
# # 2. 获取夹爪当前关节位置 (假设 0 是完全打开)
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # 计算当前关节位置与 0(打开状态)的偏差
|
||||
# joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# mean_error = torch.mean(joint_pos_abs, dim=1)
|
||||
|
||||
# # 3. 计算奖励逻辑
|
||||
# # 距离 > 3cm 时:关节越接近 0(打开),奖励越高 (最大为 1.0)
|
||||
# # 距离 <= 3cm 时:奖励为 0,不干涉模型闭合
|
||||
# is_far = distance > dist_threshold
|
||||
# open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减
|
||||
|
||||
# return torch.where(is_far, open_reward, torch.zeros_like(open_reward))
|
||||
|
||||
# ... 在文件末尾添加以下函数 ...
|
||||
|
||||
def gripper_grasp_stability(
|
||||
env: ManagerBasedRLEnv,
|
||||
robot_cfg: SceneEntityCfg,
|
||||
joint_names: list,
|
||||
vel_threshold: float = 0.005 # 极低的速度阈值,判定为“静止”
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
奖励夹爪在闭合状态下的稳定性。
|
||||
只有当关节速度极小时(即没有在抖动或移动),才发放奖励。
|
||||
"""
|
||||
robot = env.scene[robot_cfg.name]
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
|
||||
# 获取夹爪关节的绝对速度
|
||||
joint_vel = torch.abs(robot.data.joint_vel[:, joint_ids])
|
||||
|
||||
# 判定是否所有夹爪关节都处于“静止”状态
|
||||
is_stable = torch.all(joint_vel < vel_threshold, dim=1)
|
||||
|
||||
# 只有在闭合状态下谈稳定性才有意义,所以通常建议配合 is_grasped 使用
|
||||
# 这里我们只返回稳定性系数
|
||||
return is_stable.float()
|
||||
|
||||
def gripper_close_interaction_v2(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg,
|
||||
robot_cfg: SceneEntityCfg,
|
||||
handle_name: str,
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list,
|
||||
target_close_pos: float = 0.012,
|
||||
height_offset: float = 0.00,
|
||||
grasp_radius: float = 0.05
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
增强版闭合奖励:结合了位置、闭合程度和物理稳定性。
|
||||
"""
|
||||
# 1. 基础抓取判定
|
||||
is_grasped = _is_grasped(
|
||||
env, dryingbox_cfg, robot_cfg, handle_name,
|
||||
left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算闭合程度 (0.0 到 1.0)
|
||||
robot = env.scene[robot_cfg.name]
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
|
||||
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
|
||||
|
||||
# 3. 计算稳定性因子 (速度越快,奖励衰减越厉害)
|
||||
joint_vel = torch.norm(robot.data.joint_vel[:, joint_ids], dim=1)
|
||||
# 使用 exp(-x) 让奖励对速度非常敏感,速度稍大奖励就迅速消失
|
||||
stability_factor = torch.exp(-20.0 * joint_vel)
|
||||
|
||||
# 综合奖励:必须满足抓取条件,且闭合得越深、动作越稳,分数越高
|
||||
return torch.where(is_grasped, clamping_factor * stability_factor, torch.zeros_like(clamping_factor))
|
||||
@@ -0,0 +1,967 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from isaaclab.assets import Articulation, RigidObject
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
"""Penalize joint position deviation from a target value."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
# wrap the joint positions to (-pi, pi)
|
||||
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
|
||||
# compute the reward
|
||||
return torch.sum(torch.square(joint_pos - target), dim=1)
|
||||
|
||||
|
||||
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
|
||||
"""Reward the agent for lifting the lid above the minimal height."""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
|
||||
height = lid.data.root_pos_w[:, 2]
|
||||
return torch.where(height > minimal_height, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for reaching the lid using tanh-kernel."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# float 化,防止成为 (3,) 向量
|
||||
if not isinstance(std, float):
|
||||
if torch.is_tensor(std):
|
||||
std = std.item()
|
||||
else:
|
||||
std = float(std)
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
# body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
|
||||
|
||||
return 1 - torch.tanh(distance / std)
|
||||
|
||||
|
||||
def lid_grasped(env: ManagerBasedRLEnv,
|
||||
distance_threshold: float = 0.05,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for grasping the lid (close and lifted)."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
|
||||
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
|
||||
|
||||
# Check if close and lifted
|
||||
is_close = distance < distance_threshold
|
||||
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
|
||||
|
||||
return torch.where(is_close & is_lifted, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Combined reward for reaching the lid AND lifting it."""
|
||||
# Get reaching reward
|
||||
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
|
||||
# Get lifting reward
|
||||
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
|
||||
# Combine rewards multiplicatively
|
||||
return reach_reward * lift_reward
|
||||
|
||||
|
||||
def gripper_handle_orientation_alignment(
|
||||
env,
|
||||
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
handle_name: str = "Equipment_BB_13_C",
|
||||
gripper_body_name: str = "right_hand_body",
|
||||
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 夹爪指尖指向 (Local Z)
|
||||
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 夹爪手指开合轴 (Local Y)
|
||||
lid_handle_axis: tuple = (0.0, 0.0, 1.0),
|
||||
max_angle_deg: float =60.0,
|
||||
# handle_local_axis 参数已删除,因为直接使用世界坐标系 Z 轴更稳健
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
修正版:
|
||||
1. 解决了梯度消失:使用 (dot+1)/2 替代 clamp,保证全角度有梯度。
|
||||
2. 解决了轴向错误:直接认定手柄是垂直的 (World Z),修复抓取姿态。
|
||||
"""
|
||||
# 1. 获取数据
|
||||
# dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它,节省计算)
|
||||
robot = env.scene[robot_cfg.name]
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
|
||||
|
||||
box=env.scene[dryingbox_cfg.name]
|
||||
hd_ids,_=box.find_bodies([handle_name],preserve_order=True)
|
||||
handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:]
|
||||
|
||||
# --- 约束 A: 指向约束 (Pointing) ---
|
||||
# 目标:夹爪 Z 轴 指向 World +X
|
||||
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
|
||||
|
||||
target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1)
|
||||
dot_fwd=torch.clamp(dot_fwd,-1.0,1.0)
|
||||
angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0))
|
||||
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
|
||||
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
|
||||
rew_forward = 1.0 - angle_normalized # 角度越小,奖励越大
|
||||
# 如果角度超过60度,奖励为0
|
||||
rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward))
|
||||
# nan -> 0
|
||||
rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward)
|
||||
|
||||
# [关键修改 1] 使用软化奖励,提供全局梯度
|
||||
# 范围 [0, 1]: 1.0 是完美对齐,0.5 是垂直,0.0 是完全背对
|
||||
# 这样即使现在是指向地面的 (dot=0),它也有 0.5 分,且知道往上抬分会变高
|
||||
# rew_forward = (dot_fwd + 1.0) / 2.0
|
||||
# 如果你希望能更陡峭一点,可以加个平方: rew_forward = rew_forward * rew_forward
|
||||
|
||||
# --- 约束 B: 抓取姿态约束 (Grasp Alignment) ---
|
||||
# 目标:夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z)
|
||||
# 这样手指就是水平开合的
|
||||
|
||||
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local))
|
||||
|
||||
# [关键修改 2] 强制假设手柄是垂直的 (World Z)
|
||||
# 除非你的烘箱会倾斜倒在地上,否则这是最稳健的写法,不用管局部坐标系
|
||||
handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1)
|
||||
handle_axis_world=quat_apply(handle_quat_w,handle_axis_local)
|
||||
dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1)
|
||||
rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4
|
||||
# nan -> 0
|
||||
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
|
||||
|
||||
# 5. 组合
|
||||
total_reward = rew_forward * rew_yaw
|
||||
|
||||
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
# debug
|
||||
if not torch.isfinite(total_reward).all():
|
||||
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
print(f"rew_vertical: {rew_vertical}")
|
||||
print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
return total_reward
|
||||
|
||||
# # 计算点积:我们希望它们垂直,即点积为 0
|
||||
# dot_width_handle = torch.sum(grip_width_world * handle_axis_world, dim=1)
|
||||
|
||||
# # 奖励:1.0 - abs(dot)。点积越小(越垂直),分数越高
|
||||
# rew_perpendicular = 1.0 - torch.abs(dot_width_handle)
|
||||
|
||||
# # 稍微增强一下敏感度,让它更倾向于完美垂直
|
||||
# rew_perpendicular = rew_perpendicular * rew_perpendicular
|
||||
|
||||
# # 3. 组合奖励
|
||||
# total_reward = rew_forward * rew_perpendicular
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_gripper_name: str = "left_hand__l",
|
||||
# right_gripper_name: str = "left_hand_r",
|
||||
# height_offset: float = 0.03,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
|
||||
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# # lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# # [num_envs, 3]
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# # 目标点:lid位置 + height_offset(Z方向)
|
||||
# target_pos = handle_pos_w.clone()
|
||||
# # print(target_pos[0])
|
||||
# target_pos[:, 0] -= height_offset # 在lid上方height_offset处
|
||||
# # print(target_pos[0])
|
||||
# # 获取两个夹爪的位置
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# # 计算夹爪中心位置
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
# # print(gripper_center[0])
|
||||
# # 计算夹爪中心到目标点的3D距离
|
||||
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# # 距离奖励:距离越小,奖励越大
|
||||
# position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
# return position_reward
|
||||
|
||||
|
||||
# def gripper_handle_position_alignment(
|
||||
# env,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_finger_name: str = "right_hand_l",
|
||||
# right_finger_name: str = "right_hand__r",
|
||||
# # 【修改点1】:根据你的描述,长轴是 Local Z,所以改为 (0, 0, 1)
|
||||
# handle_local_axis: tuple = (0.0, 0.0, 1.0),
|
||||
# # 【新增】:明确指定手柄的“侧面”是 Local X 轴
|
||||
# handle_local_side_axis: tuple = (0.0, -1.0, 0.0),
|
||||
# grasp_width: float = 0.03,
|
||||
# grasp_offset: float = 0.06,
|
||||
# std: float = 0.05,
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 修正版:
|
||||
# 1. 修正了手柄长轴为 Local Z。
|
||||
# 2. 使用 Local X 作为手柄两侧的基准方向。
|
||||
# """
|
||||
# # 1. 获取数据
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
|
||||
# # [num_envs, 3]
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# # [num_envs, 4]
|
||||
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
|
||||
|
||||
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
|
||||
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
|
||||
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
|
||||
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
|
||||
|
||||
# # 2. 计算关键向量
|
||||
# # 【修正】:计算世界坐标系下的手柄长轴 (应为 World Z)
|
||||
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# # 虽然本奖励函数主要位置点,不需要显式用到长轴向量做点积,但保留以备后续增加角度奖励
|
||||
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
|
||||
|
||||
# # 【核心修改】:直接计算世界坐标系下的手柄“侧向”向量 (即 Local X -> World Y)
|
||||
# # 这种方法比 cross(approach, axis) 更准确,因为它尊重了物体自身的旋转
|
||||
# handle_side_local_vec = torch.tensor(handle_local_side_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# side_vec_world = normalize(quat_apply(handle_quat_w, handle_side_local_vec))
|
||||
|
||||
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
|
||||
# # 注意:如果你的烘箱放置位置旋转了,这里也建议改为读取机器人的前方或者物体的Local方向
|
||||
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# # 3. 计算理想的抓取点
|
||||
# # 抓取中心:从手柄中心沿着抓取方向(World X)反向退后 grasp_offset
|
||||
# grasp_center = handle_pos_w - approach_dir * grasp_offset
|
||||
|
||||
# # 计算理想的左/右手指位置
|
||||
# # 既然 side_vec_world 是 Local X (对应 World Y)
|
||||
# # 我们希望手指在 Y 轴两侧张开
|
||||
# ideal_l_pos = grasp_center + side_vec_world * (grasp_width / 2.0)
|
||||
# ideal_r_pos = grasp_center - side_vec_world * (grasp_width / 2.0)
|
||||
|
||||
# # 注意:这里 l_pos 用 + 还是 - 取决于你的 robot 建模里 right_hand_l 实际是在正Y侧还是负Y侧
|
||||
# # 如果发现左右手指交叉,交换上面的 + 和 - 即可。
|
||||
|
||||
# # 4. 计算距离奖励
|
||||
# dist_l = torch.norm(l_finger_pos - ideal_r_pos, dim=1)
|
||||
# dist_r = torch.norm(r_finger_pos - ideal_l_pos, dim=1)
|
||||
|
||||
# rew_l = 1.0 - torch.tanh(dist_l / std)
|
||||
# rew_r = 1.0 - torch.tanh(dist_r / std)
|
||||
|
||||
# total_reward = (rew_l + rew_r) / 2.0
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
# def gripper_handle_position_alignment(
|
||||
# env,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_finger_name: str = "right_hand_l", # 左手指 body 名称
|
||||
# right_finger_name: str = "right_hand__r", # 右手指 body 名称
|
||||
# handle_local_axis: tuple = (0.0, 0.0, 1.0), # 手柄长轴
|
||||
# grasp_width: float = 0.03, # 预期的抓取宽度 (米)
|
||||
# grasp_offset: float = 0.03, # 手指尖距离手柄中心的进深偏移
|
||||
# std: float = 0.05, # 奖励函数的敏感度
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 双指位置奖励:引导左/右手指分别到达手柄的两侧。
|
||||
# """
|
||||
# # 1. 获取数据
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# handle_quat_w = dryingbox.data.body_quat_w[:, handle_ids[0], :]
|
||||
|
||||
# l_finger_ids, _ = robot.find_bodies([left_finger_name], preserve_order=True)
|
||||
# r_finger_ids, _ = robot.find_bodies([right_finger_name], preserve_order=True)
|
||||
# l_finger_pos = robot.data.body_pos_w[:, l_finger_ids[0], :]
|
||||
# r_finger_pos = robot.data.body_pos_w[:, r_finger_ids[0], :]
|
||||
|
||||
# # 2. 计算理想的抓取点
|
||||
# # 获取手柄当前轴向
|
||||
# handle_axis_local_vec = torch.tensor(handle_local_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# handle_axis_world = normalize(quat_apply(handle_quat_w, handle_axis_local_vec))
|
||||
|
||||
# # 假设抓取方向主要是沿世界 X 轴 (指向烘箱)
|
||||
# approach_dir = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# # 计算垂直于手柄轴向和接近方向的向量,这就是“侧面”方向
|
||||
# side_vec = normalize(torch.cross(approach_dir, handle_axis_world))
|
||||
|
||||
# # 计算抓取中心点 (手柄中心沿接近方向反向偏移一点,留出空间)
|
||||
# grasp_center = handle_pos_w - approach_dir * grasp_offset
|
||||
|
||||
# # 计算理想的左/右手指位置 (在中心点两侧张开)
|
||||
# # 注意:这里需要根据你的坐标系确认哪个是左,哪个是右
|
||||
# ideal_l_pos = grasp_center - side_vec * (grasp_width / 2.0)
|
||||
# ideal_r_pos = grasp_center + side_vec * (grasp_width / 2.0)
|
||||
|
||||
# # 3. 计算距离奖励
|
||||
# dist_l = torch.norm(l_finger_pos - ideal_l_pos, dim=1)
|
||||
# dist_r = torch.norm(r_finger_pos - ideal_r_pos, dim=1)
|
||||
|
||||
# rew_l = 1.0 - torch.tanh(dist_l / std)
|
||||
# rew_r = 1.0 - torch.tanh(dist_r / std)
|
||||
|
||||
# # 取平均值
|
||||
# total_reward = (rew_l + rew_r) / 2.0
|
||||
|
||||
# return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name
|
||||
def _is_grasped(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg, # 改为 dryingbox_cfg
|
||||
robot_cfg: SceneEntityCfg,
|
||||
handle_name: str, # 新增 handle_name 参数
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list,
|
||||
height_offset: float,
|
||||
grasp_radius: float,
|
||||
target_close_pos: float
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
内部辅助函数:判定是否成功抓取。
|
||||
条件:
|
||||
1. 夹爪中心在把手目标点附近 (Sphere Check)
|
||||
2. 夹爪处于闭合发力状态 (Joint Effort Check)
|
||||
3. (关键) 夹爪X轴距离合适,不能压在把手上 (X-Axis Check)
|
||||
"""
|
||||
# 1. 获取对象
|
||||
dryingbox = env.scene[dryingbox_cfg.name] # 改为 dryingbox
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 目标点位置 (把手中心)
|
||||
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
|
||||
handle_pos_w[:, 1] += height_offset # X方向偏移
|
||||
handle_pos_w[:, 0] += 0.02
|
||||
# handle_pos_w[:, 2] -= 0.04
|
||||
# 3. 夹爪位置
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
gripper_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# 4. 条件一:距离判定 (在把手球范围内)
|
||||
dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
|
||||
if env.common_step_counter % 100 == 0:
|
||||
print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
|
||||
is_near = dist_to_handle < grasp_radius
|
||||
|
||||
# 5. 条件二:X轴距离判定 (防止压在把手上)
|
||||
x_diff = gripper_center[:, 0] - handle_pos_w[:, 0]
|
||||
is_x_valid = torch.abs(x_diff) < 0.03 # 3cm 容差
|
||||
|
||||
# 6. 条件三:闭合判定 (正在尝试闭合)
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
|
||||
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
|
||||
|
||||
# 2. 几何排斥判定 (Geometry Check)
|
||||
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
|
||||
is_not_empty = dist_fingertips > 0.005
|
||||
|
||||
# 在计算 dist_to_handle 后
|
||||
if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15:
|
||||
delta = gripper_center[0] - handle_pos_w[0]
|
||||
print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, "
|
||||
f"center={gripper_center[0].cpu().numpy()}, "
|
||||
f"delta={delta.cpu().numpy()}, "
|
||||
f"dist={dist_to_handle[0].item():.4f}, "
|
||||
f"fingertip_dist={dist_fingertips[0].item():.4f}, "
|
||||
f"joint_pos={joint_pos[0].cpu().numpy()}")
|
||||
|
||||
# 综合判定
|
||||
return is_near & is_x_valid & is_effort_closing & is_not_empty
|
||||
|
||||
|
||||
# 修改 gripper_close_when_near 函数
|
||||
def gripper_close_when_near(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), # 改为 dryingbox_cfg
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
handle_name: str = "Equipment_BB_13_C", # 新增 handle_name 参数
|
||||
left_gripper_name: str = "right_hand_l", # 更新默认值
|
||||
right_gripper_name: str = "right_hand__r", # 更新默认值
|
||||
joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 更新默认值
|
||||
target_close_pos: float = 0.03,
|
||||
height_offset: float = 0.005, # 更新默认值
|
||||
grasp_radius: float = 0.03 # 更新默认值
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定基础抓取条件是否满足
|
||||
is_grasped = _is_grasped(
|
||||
env, dryingbox_cfg, robot_cfg, handle_name, # 传入 handle_name
|
||||
left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算夹紧程度 (Clamping Intensity)
|
||||
robot = env.scene[robot_cfg.name]
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
|
||||
|
||||
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
|
||||
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
|
||||
|
||||
|
||||
|
||||
return torch.where(is_grasped, clamping_factor, 0.0)
|
||||
|
||||
|
||||
|
||||
# def gripper_close_when_near(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
|
||||
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
|
||||
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.03,
|
||||
# grasp_radius: float = 0.02 # 球面半径 2cm
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取位置
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 把手中心
|
||||
|
||||
# # 获取左右指尖位置
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# # 计算夹爪中心
|
||||
# pos_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# # 2. 距离判定 (Is Inside Sphere?)
|
||||
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
|
||||
# is_in_sphere = (dist_center < grasp_radius).float()
|
||||
|
||||
# # 3. "在中间"判定 (Is Between?)
|
||||
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
|
||||
# vec_LR = pos_R - pos_L # 左指指向右指
|
||||
# vec_LO = lid_pos_w - pos_L # 左指指向物体
|
||||
|
||||
# # 计算投影比例 t
|
||||
# # P_proj = P_L + t * (P_R - P_L)
|
||||
# # t = (vec_LO . vec_LR) / |vec_LR|^2
|
||||
# # 如果 0 < t < 1,说明投影在两个指尖之间
|
||||
|
||||
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
|
||||
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
|
||||
# t = dot / len_sq
|
||||
|
||||
# is_between = (t > 0.0) & (t < 1.0)
|
||||
# is_between = is_between.float()
|
||||
|
||||
# # 4. 闭合判定
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
|
||||
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
|
||||
# # 只有当非常接近闭合目标时才给分
|
||||
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
|
||||
|
||||
# # 5. 最终奖励
|
||||
# # 只有三者同时满足才给 1.0 分
|
||||
# reward = is_in_sphere * is_between * is_closing
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
# def lid_is_lifted(
|
||||
# env:ManagerBasedRLEnv,
|
||||
# minimal_height:float,
|
||||
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# lid_height = lid.data.root_pos_w[:, 2]
|
||||
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
|
||||
# reward=torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
def lid_is_lifted(
|
||||
env: ManagerBasedRLEnv,
|
||||
minimal_height: float = 0.05, # 相对提升阈值
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
height_offset: float = 0.03,
|
||||
grasp_radius: float = 0.1,
|
||||
target_close_pos: float = 0.03,
|
||||
) -> torch.Tensor:
|
||||
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
lid = env.scene[lid_cfg.name]
|
||||
|
||||
# 1. 获取高度
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
# 自动获取初始高度
|
||||
initial_height = lid.data.default_root_state[:, 2]
|
||||
|
||||
# 2. 计算提升量
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0)
|
||||
|
||||
# 3. 速度检查 (防撞飞)
|
||||
# 如果速度 > 1.0 m/s,视为被撞飞,不给分
|
||||
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
|
||||
is_stable = lid_vel < 1.0
|
||||
|
||||
# 4. 计算奖励
|
||||
# 基础分:高度越高分越高
|
||||
shaping_reward = lift_height * 2.0
|
||||
# 成功分:超过阈值
|
||||
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
|
||||
|
||||
# 组合
|
||||
# 必须 is_grasped AND is_stable
|
||||
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
|
||||
|
||||
return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
|
||||
def lid_lift_success_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand_body",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# 关键参数
|
||||
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
|
||||
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
|
||||
grasp_dist_threshold: float = 0.05, # 抓取判定距离
|
||||
closed_pos: float = 0.03 # 夹爪闭合阈值
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
|
||||
"""
|
||||
# 1. 获取数据
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 盖子实时高度
|
||||
lid_z = lid.data.root_pos_w[:, 2]
|
||||
lid_pos = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 夹爪位置
|
||||
body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# 2. 【条件一:是否夹稳 (Is Grasped?)】
|
||||
# 距离检查
|
||||
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
|
||||
is_close = distance < grasp_dist_threshold
|
||||
|
||||
# 闭合检查
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# 假设 > 80% 的闭合度算抓紧了
|
||||
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
|
||||
|
||||
is_grasped = is_close & is_closed
|
||||
|
||||
# 3. 【条件二:计算相对提升 (Relative Lift)】
|
||||
# 当前高度 - 初始稳定高度
|
||||
current_lift = lid_z - settled_height
|
||||
|
||||
# 4. 计算奖励
|
||||
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
|
||||
|
||||
# 基础提升分 (Shaping Reward)
|
||||
lift_reward = lift_progress
|
||||
|
||||
# 成功大奖 (Success Bonus)
|
||||
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
|
||||
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
|
||||
|
||||
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
|
||||
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
|
||||
|
||||
# 5. 安全输出
|
||||
return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
def lid_lift_progress_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
initial_height: float = 0.8,
|
||||
target_height_lift: float = 0.15,
|
||||
height_offset: float = 0.02, # 必须与抓取奖励保持一致
|
||||
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
|
||||
target_close_pos: float = 0.03,
|
||||
std: float = 0.05 # 标准差
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定是否夹住
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算高度
|
||||
lid = env.scene[lid_cfg.name]
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
|
||||
# print(current_height)
|
||||
# print(lift_height)
|
||||
# 3. 计算奖励
|
||||
# 只有在 is_grasped 为 True 时,才发放高度奖励
|
||||
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
|
||||
progress = torch.tanh(lift_height / std)
|
||||
|
||||
reward = torch.where(is_grasped, progress, 0.0)
|
||||
|
||||
return reward
|
||||
|
||||
# def lid_grasped_bonus(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg,
|
||||
# robot_cfg: SceneEntityCfg,
|
||||
# left_gripper_name: str,
|
||||
# joint_names: list,
|
||||
# distance_threshold: float = 0.05,
|
||||
# closed_pos: float = 0.03
|
||||
# ) -> torch.Tensor:
|
||||
# # 1. 计算距离
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
# lid_pos = lid.data.root_pos_w[:, :3]
|
||||
# distance = torch.norm(gripper_pos - lid_pos, dim=-1)
|
||||
|
||||
# # 2. 检查距离是否达标
|
||||
# is_close = distance < distance_threshold
|
||||
|
||||
# # 3. 检查夹爪是否闭合
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# # 检查是否接近闭合 (比如 > 0.02)
|
||||
# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
|
||||
|
||||
# # 4. 给分
|
||||
# # 只有同时满足才给 1.0
|
||||
# reward = torch.where(is_close & is_closed, 1.0, 0.0)
|
||||
|
||||
# return torch.nan_to_num(reward)
|
||||
|
||||
# 在 rewards.py 文件开头添加调试函数
|
||||
def debug_robot_physics(
|
||||
env: ManagerBasedRLEnv,
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "right_hand_body",
|
||||
distance_threshold: float = 0.1, # 当距离小于此值时打印
|
||||
print_interval: int = 10 # 每N步打印一次
|
||||
) -> None:
|
||||
"""
|
||||
调试函数:打印机器人的速度、加速度等信息
|
||||
"""
|
||||
# 使用静态变量控制打印频率
|
||||
if not hasattr(debug_robot_physics, "step_count"):
|
||||
debug_robot_physics.step_count = 0
|
||||
debug_robot_physics.step_count += 1
|
||||
|
||||
if debug_robot_physics.step_count % print_interval != 0:
|
||||
return
|
||||
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 1. 获取机器人根节点的速度和加速度
|
||||
root_lin_vel = robot.data.root_lin_vel_w # (num_envs, 3)
|
||||
root_ang_vel = robot.data.root_ang_vel_w # (num_envs, 3)
|
||||
root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1) # (num_envs,)
|
||||
root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1) # (num_envs,)
|
||||
|
||||
# 2. 获取夹爪body的速度
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :] # (num_envs, 3)
|
||||
gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :] # (num_envs, 3)
|
||||
gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1)
|
||||
gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1)
|
||||
|
||||
# 3. 获取关节速度(可能影响碰撞)
|
||||
joint_vel = robot.data.joint_vel # (num_envs, num_joints)
|
||||
max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0] # (num_envs,)
|
||||
|
||||
# 4. 检查是否有异常高的速度(可能导致碰撞问题)
|
||||
high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
|
||||
|
||||
if torch.any(high_vel_mask):
|
||||
env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
|
||||
for env_id in env_ids[:3]: # 只打印前3个环境
|
||||
print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
|
||||
print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
|
||||
print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
|
||||
print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
|
||||
print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
|
||||
print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
|
||||
|
||||
def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
handle_name: str = "Equipment_BB_13_C",
|
||||
left_gripper_name: str = "right_hand_l",
|
||||
right_gripper_name: str = "right_hand__r",
|
||||
height_offset: float = 0.03,
|
||||
std: float = 0.1) -> torch.Tensor:
|
||||
|
||||
dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
target_pos = handle_pos_w.clone()
|
||||
target_pos[:, 1] += height_offset
|
||||
target_pos[:, 0] += 0.02
|
||||
# target_pos[:, 2] -= 0.04
|
||||
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# ===== 添加调试打印 =====
|
||||
# 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
|
||||
close_mask = distance < 0.03
|
||||
if torch.any(close_mask):
|
||||
debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
|
||||
# ========================
|
||||
|
||||
position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
# 计算距离后插入
|
||||
if env.common_step_counter % 500 == 0:
|
||||
print("pos_align dbg env0: "
|
||||
f"handle={handle_pos_w[0].cpu().numpy()}, "
|
||||
f"target={target_pos[0].cpu().numpy()}, "
|
||||
f"gripL={left_gripper_pos[0].cpu().numpy()}, "
|
||||
f"gripR={right_gripper_pos[0].cpu().numpy()}, "
|
||||
f"center={gripper_center[0].cpu().numpy()}, "
|
||||
f"dist={distance[0].item():.4f}")
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
# def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# handle_name: str = "Equipment_BB_13_C",
|
||||
# left_gripper_name: str = "right_hand_l",
|
||||
# right_gripper_name: str = "right_hand__r",
|
||||
# height_offset: float = 0.03,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
|
||||
# dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
|
||||
# handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
|
||||
# target_pos = handle_pos_w.clone()
|
||||
# target_pos[:, 1] += height_offset
|
||||
# target_pos[:, 0] += 0.02
|
||||
# # target_pos[:, 2] -= 0.02
|
||||
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
# distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# # ===== 添加调试打印 =====
|
||||
# # 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
|
||||
# close_mask = distance < 0.03
|
||||
# if torch.any(close_mask):
|
||||
# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
|
||||
# # ========================
|
||||
|
||||
# position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
# # 计算距离后插入
|
||||
# if env.common_step_counter % 00 == 0:
|
||||
# print("pos_align dbg env0: "
|
||||
# f"handle={handle_pos_w[0].cpu().numpy()}, "
|
||||
# f"target={target_pos[0].cpu().numpy()}, "
|
||||
# f"gripL={left_gripper_pos[0].cpu().numpy()}, "
|
||||
# f"gripR={right_gripper_pos[0].cpu().numpy()}, "
|
||||
# f"center={gripper_center[0].cpu().numpy()}, "
|
||||
# f"dist={distance[0].item():.4f}")
|
||||
|
||||
# return position_reward
|
||||
|
||||
|
||||
# def gripper_open_when_far(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# dryingbox_cfg: SceneEntityCfg,
|
||||
# robot_cfg: SceneEntityCfg,
|
||||
# handle_name: str,
|
||||
# left_gripper_name: str,
|
||||
# right_gripper_name: str,
|
||||
# joint_names: list,
|
||||
# dist_threshold: float = 0.5,
|
||||
# target_open_pos: float = 0.3,
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 当夹爪距离目标点较远(> dist_threshold)时,奖励夹爪保持打开状态。
|
||||
# """
|
||||
# # 1. 获取位置
|
||||
# dryingbox = env.scene[dryingbox_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 获取手柄位置
|
||||
# handle_ids, _ = dryingbox.find_bodies([handle_name])
|
||||
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
|
||||
|
||||
# # 获取夹爪中心位置
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# gripper_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# # 2. 计算距离
|
||||
# dist = torch.norm(gripper_center - handle_pos, dim=-1)
|
||||
|
||||
# # 3. 获取手指关节位置的绝对值
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# current_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# mean_pos_abs = torch.mean(current_pos_abs, dim=1)
|
||||
|
||||
# # 4. 计算奖励:如果距离 > 0.5,奖励 mean_pos_abs 接近 target_open_pos (0.3)
|
||||
# # 使用 tanh 误差函数,误差越小奖励越高
|
||||
# error = torch.abs(mean_pos_abs - target_open_pos)
|
||||
# reward = 1.0 - torch.tanh(error / 0.1) # 0.1 为缩放因子,可根据需要调整
|
||||
|
||||
# # 只有距离大于阈值时才发放奖励
|
||||
# return torch.where(dist > dist_threshold, reward, torch.zeros_like(reward))
|
||||
|
||||
|
||||
def gripper_open_when_far(
|
||||
env: ManagerBasedRLEnv,
|
||||
dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用
|
||||
robot_cfg: SceneEntityCfg,
|
||||
handle_name: str, # 保留接口一致性,但不再使用
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list, # 不再使用
|
||||
dist_threshold: float = 0.5, # 不再使用
|
||||
target_open_dist: float = 0.06, # 两手指完全打开时的距离(米)
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
奖励夹爪最大张开状态:
|
||||
只根据左右两根手指之间的实际距离计算奖励,
|
||||
当距离接近 target_open_dist (0.06m) 时奖励最大。
|
||||
"""
|
||||
|
||||
# 1. 获取机器人
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 获取左右手指的世界坐标
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# 3. 计算两手指之间的实际距离
|
||||
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# 4. 奖励:距离越接近最大打开距离,奖励越高
|
||||
error = torch.abs(finger_dist - target_open_dist)
|
||||
reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度
|
||||
|
||||
return reward
|
||||
@@ -40,7 +40,7 @@ TABLE_CFG=RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Table",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.95, -0.3, 0.005],
|
||||
rot=[0.7071, 0.0, 0.0, 0.7071],
|
||||
rot=[0.7071, 0.0, 0.0, -0.7071],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
@@ -76,7 +76,7 @@ DRYINGBOX_CFG = ArticulationCfg(
|
||||
),
|
||||
),
|
||||
init_state=ArticulationCfg.InitialStateCfg(
|
||||
pos=(0.95, -0.45, 0.9),
|
||||
pos=[0.95, -0.45, 0.9],
|
||||
rot=[-0.7071, 0.0, 0.0, 0.7071]
|
||||
),
|
||||
# actuators={}
|
||||
@@ -157,7 +157,8 @@ ULTRASOUND_CFG = ArticulationCfg(
|
||||
##
|
||||
# Scene definition
|
||||
##
|
||||
|
||||
from isaaclab.sensors import CameraCfg
|
||||
import isaaclab.sim as sim_utils
|
||||
|
||||
@configclass
|
||||
class PullSceneCfg(InteractiveSceneCfg):
|
||||
@@ -181,6 +182,109 @@ class PullSceneCfg(InteractiveSceneCfg):
|
||||
prim_path="/World/DomeLight",
|
||||
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
|
||||
)
|
||||
# # ------------- 新增 6 路相机 -------------
|
||||
# cam_head = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=1.93, # 使用你给的 1.93
|
||||
# # 由 fx = W * f / aperture 推得 aperture = W * f / fx
|
||||
# horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致
|
||||
# clipping_range=(0.1, 1e5),
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.1226748226, 0.0, 0.1130563345),
|
||||
# rot=(-0.5, 0.5, -0.5, 0.5),
|
||||
# # convention="ros",
|
||||
|
||||
# # rot=(0.70710678, 0, 0.70710678, 0),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_chest = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(-0.14909, 0.3549, -0.23295),
|
||||
# rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_left_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
|
||||
# rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_right_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.0610571709, 2.58e-7, 0.0711255539),#(0.0610568560, 1.13e-8, 0.0683504405)
|
||||
# rot=(0.445, -0.549, -0.549, -0.445),#(0.99452, 0, 0.10453, 0)
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_top = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.4627602211, 0.0, 2.0977711170),
|
||||
# rot=(0.83526, 0, 0.54986, 0),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=960,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=False,
|
||||
# )
|
||||
|
||||
# cam_side = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.76709, -0.16995, 1.13007),
|
||||
# rot=(0.61427, 0, 0.02598, 0.78866),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=960,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=False,
|
||||
# )
|
||||
# # ------------- 相机结束 -------------
|
||||
|
||||
|
||||
##
|
||||
@@ -222,21 +326,6 @@ class ActionsCfg:
|
||||
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
|
||||
)
|
||||
|
||||
# left_arm_fixed = mdp.JointPositionActionCfg(
|
||||
# asset_name="Mindbot",
|
||||
# joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
|
||||
|
||||
# # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动
|
||||
# scale=0.0,
|
||||
# offset={
|
||||
# "l_joint1": 2.3562,
|
||||
# "l_joint2": -1.2217,
|
||||
# "l_joint3": -1.5708,
|
||||
# "l_joint4": 1.5708,
|
||||
# "l_joint5": 1.5708,
|
||||
# "l_joint6": -1.2217,
|
||||
# },
|
||||
# )
|
||||
|
||||
grippers_position = mdp.BinaryJointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
@@ -317,16 +406,12 @@ class EventCfg:
|
||||
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
|
||||
params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
# reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
|
||||
# params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
# reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
|
||||
# params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
|
||||
|
||||
@configclass
|
||||
class RewardsCfg:
|
||||
"""Reward terms for the MDP."""
|
||||
|
||||
# 1. 姿态对齐:保持不变,这是基础
|
||||
gripper_handle_orientation_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_orientation_alignment,
|
||||
params={
|
||||
@@ -337,15 +422,12 @@ class RewardsCfg:
|
||||
"gripper_forward_axis": (0.0, 0.0, 1.0),
|
||||
"gripper_width_axis": (0.0, 1.0, 0.0),
|
||||
"lid_handle_axis": (0.0, 0.0, 1.0),
|
||||
# 删除了 handle_axis,因为新函数中不再使用它
|
||||
"max_angle_deg": 85.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松
|
||||
"max_angle_deg": 60.0, # 稍微放宽一点,或者保持 30.0,看前期训练难度
|
||||
},
|
||||
weight=5.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
|
||||
weight=4.0
|
||||
)
|
||||
|
||||
# stage 2
|
||||
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
|
||||
|
||||
# 2. 位置对齐 (合并了之前的粗略和精细对齐)
|
||||
# std=0.05 提供了从 15cm 到 0cm 全程较好的梯度,解决了之前的梯度消失问题
|
||||
gripper_handle_position_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_position_alignment,
|
||||
params={
|
||||
@@ -354,29 +436,31 @@ class RewardsCfg:
|
||||
"handle_name":"Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
"std": 0.2, # 位置对齐的容忍度0.3
|
||||
"height_offset": 0.00,
|
||||
"std": 0.05, # 关键修改:0.3太松,0.03太紧导致梯度消失,0.05是平衡点
|
||||
},
|
||||
weight=3.0
|
||||
weight=6.0 # 提高权重,作为主导奖励
|
||||
)
|
||||
|
||||
# 【新增】精细对齐 (引导进入 2cm 圈)
|
||||
gripper_handle_fine_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_position_alignment,
|
||||
# 3. 远距离闭合惩罚 (逻辑已修正)
|
||||
# 现在它只惩罚“在远处就闭眼”的行为,不会阻挡机器人靠近
|
||||
gripper_closed_penalty = RewTerm(
|
||||
func=mdp.gripper_open_penalty_when_far,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name":"Equipment_BB_13_C",
|
||||
"handle_name": "Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
"std": 0.05, # 位置对齐的容忍度0.05
|
||||
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
"dist_threshold": 0.06, # 2.5cm 保护区,进入后允许闭合
|
||||
},
|
||||
weight=10.0 # 高权重
|
||||
weight=0.5 # 权重降低,作为辅助约束,不要喧宾夺主
|
||||
)
|
||||
# ... 在 RewardsCfg 类中修改 ...
|
||||
|
||||
# 4. 近距离抓取交互 (改用 V2 版本或增加稳定性项)
|
||||
gripper_close_interaction = RewTerm(
|
||||
func=mdp.gripper_close_when_near,
|
||||
func=mdp.gripper_close_interaction_v2, # 切换到带稳定性的版本
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
@@ -384,34 +468,171 @@ class RewardsCfg:
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
"target_close_pos": 0.02,
|
||||
"height_offset": 0.00,#0.07
|
||||
"grasp_radius": 0.05,#original 0.03
|
||||
"target_close_pos": 0.012,
|
||||
"height_offset": 0.00,
|
||||
"grasp_radius": 0.05,
|
||||
},
|
||||
weight=10.0
|
||||
weight=8.0 # 适当调低一点点,为稳定性奖励腾出空间
|
||||
)
|
||||
|
||||
# lid_lifted_reward = RewTerm(
|
||||
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
|
||||
# 5. 新增:抓取稳定性奖励 (核心:惩罚抖动)
|
||||
gripper_grasp_stability = RewTerm(
|
||||
func=mdp.gripper_grasp_stability,
|
||||
params={
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
"vel_threshold": 0.005,
|
||||
},
|
||||
weight=4.0 # 只要保持静止就一直给分
|
||||
)
|
||||
# 4. 近距离抓取交互 (修正了目标值)
|
||||
# gripper_close_interaction = RewTerm(
|
||||
# func=mdp.gripper_close_when_near,
|
||||
# params={
|
||||
# "lid_cfg": SceneEntityCfg("lid"),
|
||||
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name":"Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
|
||||
# # 关键修改:物理接触点计算
|
||||
# # 夹爪空载全闭是0.03 (对应0cm)
|
||||
# # 把手宽3.88cm -> 单侧行程 (6-3.88)/2 = 1.06cm = 0.0106m
|
||||
# # 设为 0.012 确保接触时计算出的奖励是满分 (ratio >= 1.0)
|
||||
# "target_close_pos": 0.012,
|
||||
|
||||
# "height_offset": 0.00,
|
||||
# "grasp_radius": 0.05,
|
||||
# },
|
||||
# weight=10.0 # 给一个足够大的权重
|
||||
# weight=10.0 # 只有成功抓到才给的大奖
|
||||
# )
|
||||
|
||||
# lid_lifting_reward = RewTerm(
|
||||
# func=mdp.lid_lift_progress_reward,
|
||||
# params={
|
||||
# "lid_cfg": SceneEntityCfg("lid"),
|
||||
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
|
||||
# "target_height_lift": 0.25,
|
||||
# "height_offset": 0.07, # 与其他奖励保持一致
|
||||
# "std": 0.1
|
||||
# },
|
||||
# # 权重设大一点,告诉它这是最终目标
|
||||
# weight=10.0
|
||||
# )
|
||||
# @configclass
|
||||
# class RewardsCfg:
|
||||
# """Reward terms for the MDP."""
|
||||
|
||||
# gripper_handle_orientation_alignment = RewTerm(
|
||||
# func=mdp.gripper_handle_orientation_alignment,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name": "Equipment_BB_13_C",
|
||||
# "gripper_body_name": "right_hand_body",
|
||||
# "gripper_forward_axis": (0.0, 0.0, 1.0),
|
||||
# "gripper_width_axis": (0.0, 1.0, 0.0),
|
||||
# "lid_handle_axis": (0.0, 0.0, 1.0),
|
||||
# # 删除了 handle_axis,因为新函数中不再使用它
|
||||
# "max_angle_deg": 30.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松
|
||||
# },
|
||||
# weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
|
||||
# )
|
||||
|
||||
# # stage 2
|
||||
# # 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
|
||||
|
||||
# gripper_handle_position_alignment = RewTerm(
|
||||
# func=mdp.gripper_handle_position_alignment,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name":"Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
# "std": 0.3, # 位置对齐的容忍度0.3 ###0.1
|
||||
# },
|
||||
# weight=3.0
|
||||
# )
|
||||
|
||||
# # 强制远距离打开奖励
|
||||
# # 远距离闭合惩罚
|
||||
# gripper_closed_penalty = RewTerm(
|
||||
# func=mdp.gripper_open_penalty_when_far,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name": "Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
# "dist_threshold": 0.02, # 3cm 以外不允许闭合
|
||||
# "max_penalty_dist":0.055 #0.055
|
||||
# },
|
||||
# weight=1.0 # 提高权重,让扣分很痛8.0
|
||||
# )
|
||||
|
||||
|
||||
# # 【新增】精细对齐 (引导进入 2cm 圈)
|
||||
# gripper_handle_fine_alignment = RewTerm(
|
||||
# func=mdp.gripper_handle_position_alignment,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name":"Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
# "std": 0.03, # 位置对齐的容忍度0.05
|
||||
# },
|
||||
# weight=5.0 # 高权重
|
||||
# )
|
||||
|
||||
# # gripper_close_interaction = RewTerm(
|
||||
# # func=mdp.gripper_close_when_near,
|
||||
# # params={
|
||||
# # "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# # "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# # "handle_name": "Equipment_BB_13_C",
|
||||
# # "left_gripper_name": "right_hand_l",
|
||||
# # "right_gripper_name": "right_hand__r",
|
||||
# # "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
# # # 关键匹配把手尺寸
|
||||
# # "handle_thickness": 0.0388, # 把手厚度(X/宽度向抓取方向)
|
||||
# # "thickness_tol": 0.008, # 误差容忍,约 0.8cm
|
||||
# # "grasp_radius": 0.04, # 进入此半径才计算闭合奖励
|
||||
# # "height_offset": 0.00,
|
||||
# # "target_close_pos": 0.03, # 保留给关节闭合幅度
|
||||
# # },
|
||||
# # weight=10.0
|
||||
# # )
|
||||
# gripper_close_interaction = RewTerm(
|
||||
# func=mdp.gripper_close_when_near,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name":"Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
# "target_close_pos": 0.03,
|
||||
# "height_offset": 0.00,#0.07
|
||||
# "grasp_radius": 0.04,#original 0.03
|
||||
# },
|
||||
# weight=10.0
|
||||
# )
|
||||
|
||||
# # lid_lifted_reward = RewTerm(
|
||||
# # func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
|
||||
# # params={
|
||||
# # "lid_cfg": SceneEntityCfg("lid"),
|
||||
# # "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
|
||||
# # },
|
||||
# # weight=10.0 # 给一个足够大的权重
|
||||
# # )
|
||||
|
||||
# # lid_lifting_reward = RewTerm(
|
||||
# # func=mdp.lid_lift_progress_reward,
|
||||
# # params={
|
||||
# # "lid_cfg": SceneEntityCfg("lid"),
|
||||
# # "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
|
||||
# # "target_height_lift": 0.25,
|
||||
# # "height_offset": 0.07, # 与其他奖励保持一致
|
||||
# # "std": 0.1
|
||||
# # },
|
||||
# # # 权重设大一点,告诉它这是最终目标
|
||||
# # weight=10.0
|
||||
# # )
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,681 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
from re import T
|
||||
from tkinter import N
|
||||
import torch
|
||||
from isaaclab.actuators import ImplicitActuatorCfg
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
|
||||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||||
from isaaclab.managers import EventTermCfg as EventTerm
|
||||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||||
from isaaclab.managers import CurriculumTermCfg as CurrTerm
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.utils import configclass
|
||||
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
|
||||
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
|
||||
|
||||
from . import mdp
|
||||
|
||||
##
|
||||
# Pre-defined configs
|
||||
##
|
||||
|
||||
from mindbot.robot.mindbot import MINDBOT_CFG
|
||||
|
||||
# ====== 其他物体配置 ======
|
||||
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
|
||||
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
|
||||
|
||||
|
||||
TABLE_CFG=RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Table",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.95, -0.3, 0.005],
|
||||
rot=[0.7071, 0.0, 0.0, -0.7071],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
DRYINGBOX_CFG = ArticulationCfg(
|
||||
spawn=sim_utils.UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
|
||||
rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
disable_gravity=False,
|
||||
max_depenetration_velocity=1.0
|
||||
),
|
||||
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
|
||||
enabled_self_collisions=False,#
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
stabilization_threshold=1e-6,
|
||||
# fix_root_link=True,
|
||||
),
|
||||
),
|
||||
init_state=ArticulationCfg.InitialStateCfg(
|
||||
pos=[0.95, -0.45, 0.9],
|
||||
rot=[-0.7071, 0.0, 0.0, 0.7071]
|
||||
),
|
||||
# actuators={}
|
||||
actuators={
|
||||
"passive_damper": ImplicitActuatorCfg(
|
||||
# ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他)
|
||||
joint_names_expr=["RevoluteJoint"],
|
||||
|
||||
stiffness=10000.0,
|
||||
damping=1000.0,
|
||||
effort_limit_sim=10000.0,
|
||||
velocity_limit_sim=100.0,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
LID_CFG = RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Lid",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.2, 0.65, 0.9],
|
||||
rot=[1.0, 0.0, 0.0, 0.0],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
|
||||
# scale=(0.2, 0.2, 0.2),
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
max_angular_velocity=1000.0,
|
||||
max_linear_velocity=1000.0,
|
||||
max_depenetration_velocity=0.5,#original 5.0
|
||||
linear_damping=5.0, #original 0.5
|
||||
angular_damping=5.0, #original 0.5
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
ULTRASOUND_CFG = ArticulationCfg(
|
||||
spawn=sim_utils.UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
|
||||
rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
disable_gravity=False,
|
||||
max_depenetration_velocity=1.0
|
||||
),
|
||||
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
|
||||
enabled_self_collisions=False,#
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
stabilization_threshold=1e-6,
|
||||
# fix_root_link=True,
|
||||
),
|
||||
),
|
||||
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
|
||||
# actuators={}
|
||||
actuators={
|
||||
"passive_damper": ImplicitActuatorCfg(
|
||||
# ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他)
|
||||
joint_names_expr=[".*"],
|
||||
|
||||
stiffness=0.0,
|
||||
damping=1000.0,
|
||||
effort_limit_sim=10000.0,
|
||||
velocity_limit_sim=100.0,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
##
|
||||
# Scene definition
|
||||
##
|
||||
from isaaclab.sensors import CameraCfg
|
||||
import isaaclab.sim as sim_utils
|
||||
|
||||
@configclass
|
||||
class PullSceneCfg(InteractiveSceneCfg):
|
||||
"""Configuration for a cart-pole scene."""
|
||||
|
||||
# ground plane
|
||||
ground = AssetBaseCfg(
|
||||
prim_path="/World/ground",
|
||||
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
|
||||
)
|
||||
|
||||
# robot
|
||||
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
|
||||
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
|
||||
dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
|
||||
# ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
|
||||
# lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
|
||||
|
||||
# lights
|
||||
dome_light = AssetBaseCfg(
|
||||
prim_path="/World/DomeLight",
|
||||
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
|
||||
)
|
||||
|
||||
# # ------------- 修改后的相机配置 (直接使用 USD 内置相机) -------------
|
||||
|
||||
# # 1. 头部相机
|
||||
# # USD 路径: /root/robot_head/cam_head
|
||||
# cam_head = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# update_latest_camera_pose=True,
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# # 移除 spawn 和 offset,直接使用 USD 中的参数和位置
|
||||
# )
|
||||
|
||||
# # 2. 胸部相机
|
||||
# # USD 路径: /root/robot_trunk/cam_chest
|
||||
# cam_chest = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
|
||||
# update_latest_camera_pose=True,
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# )
|
||||
|
||||
# # 3. 左手相机
|
||||
# # USD 路径: /root/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand
|
||||
# cam_left_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
|
||||
# update_latest_camera_pose=True,
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# )
|
||||
|
||||
# # 4. 右手相机
|
||||
# # USD 路径: /root/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand
|
||||
# cam_right_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
|
||||
# update_latest_camera_pose=True,
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# )
|
||||
|
||||
# ------------- 相机结束 -------------
|
||||
|
||||
|
||||
# # ------------- 新增 6 路相机 -------------
|
||||
# cam_head = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=1.93, # 使用你给的 1.93
|
||||
# # 由 fx = W * f / aperture 推得 aperture = W * f / fx
|
||||
# horizontal_aperture=1280 * 1.93 / 911.77, # ≈ 2.71,单位与 focal_length 一致
|
||||
# clipping_range=(0.1, 1e5),
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.1226748226, 0.0, 0.1130563345),
|
||||
# rot=(-0.5, 0.5, -0.5, 0.5),
|
||||
# # convention="ros",
|
||||
|
||||
# # rot=(0.70710678, 0, 0.70710678, 0),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_chest = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(-0.14909, 0.3549, -0.23295),
|
||||
# rot=(0.99452, 0.0, 0.10453, 0.0),#(0.99452, 0.0, 0.10453, 0.0),#
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_left_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.0610568560, 1.13e-8, 0.0683504405),#()
|
||||
# rot=(0.99452, 0, 0.10453, 0),#(0.99452, 0, 0.10453, 0)
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_right_hand = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.0610571709, 2.58e-7, 0.0711255539),#(0.0610568560, 1.13e-8, 0.0683504405)
|
||||
# rot=(0.445, -0.549, -0.549, -0.445),#(0.99452, 0, 0.10453, 0)
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=1280,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=True,
|
||||
# )
|
||||
|
||||
# cam_top = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_top",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.4627602211, 0.0, 2.0977711170),
|
||||
# rot=(0.83526, 0, 0.54986, 0),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=960,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=False,
|
||||
# )
|
||||
|
||||
# cam_side = CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/cam_side",
|
||||
# spawn=sim_utils.PinholeCameraCfg(
|
||||
# focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1e5)
|
||||
# ),
|
||||
# offset=CameraCfg.OffsetCfg(
|
||||
# pos=(0.76709, -0.16995, 1.13007),
|
||||
# rot=(0.61427, 0, 0.02598, 0.78866),
|
||||
# convention="ros",
|
||||
# ),
|
||||
# data_types=["rgb"],
|
||||
# width=960,
|
||||
# height=720,
|
||||
# update_latest_camera_pose=False,
|
||||
# )
|
||||
# # ------------- 相机结束 -------------
|
||||
|
||||
|
||||
##
|
||||
# MDP settings
|
||||
##
|
||||
|
||||
def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
|
||||
asset = env.scene[asset_cfg.name]
|
||||
body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
|
||||
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
|
||||
return pos_w
|
||||
|
||||
def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
asset = env.scene[asset_cfg.name]
|
||||
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
|
||||
return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
|
||||
|
||||
# 新增:通用获取 body 世界姿态的函数
|
||||
def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
asset = env.scene[asset_cfg.name]
|
||||
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
|
||||
return asset.data.body_quat_w[:, body_ids[0]]
|
||||
|
||||
|
||||
@configclass
|
||||
class ActionsCfg:
|
||||
"""Action specifications for the MDP."""
|
||||
|
||||
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维)
|
||||
right_arm_ee = DifferentialInverseKinematicsActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["r_joint[1-6]"], # 左臂的6个关节
|
||||
body_name="right_hand_body", # 末端执行器body名称
|
||||
controller=DifferentialIKControllerCfg(
|
||||
command_type="pose", # 控制位置+姿态
|
||||
use_relative_mode=True, # 相对模式:动作是增量
|
||||
ik_method="dls", # Damped Least Squares方法
|
||||
),
|
||||
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
|
||||
)
|
||||
|
||||
|
||||
grippers_position = mdp.BinaryJointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["right_hand_joint_left", "right_hand_joint_right"],
|
||||
# 修正:使用字典格式
|
||||
# open_command_expr={"关节名正则": 值}
|
||||
open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
|
||||
# close_command_expr={"关节名正则": 值}
|
||||
close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
|
||||
)
|
||||
|
||||
trunk_position = mdp.JointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["PrismaticJoint"],
|
||||
scale=0.00,
|
||||
offset=0.25,
|
||||
clip=None
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class ObservationsCfg:
|
||||
"""Observation specifications for the MDP."""
|
||||
|
||||
@configclass
|
||||
class PolicyCfg(ObsGroup):
|
||||
"""Observations for policy group."""
|
||||
|
||||
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
|
||||
right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
|
||||
dryingbox_handle_pos = ObsTerm(
|
||||
func=get_body_pos_rel,
|
||||
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
|
||||
)
|
||||
dryingbox_handle_quat = ObsTerm(
|
||||
func=get_body_quat_w,
|
||||
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
|
||||
)
|
||||
# lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
|
||||
# ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.enable_corruption = False
|
||||
self.concatenate_terms = True
|
||||
|
||||
# observation groups
|
||||
policy: PolicyCfg = PolicyCfg()
|
||||
|
||||
|
||||
@configclass
|
||||
class EventCfg:
|
||||
"""Configuration for events."""
|
||||
# 重置所有关节到 init_state(无偏移)
|
||||
reset_mindbot_all_joints = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot"),
|
||||
"position_range": (0.0, 0.0),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
# 只对左臂添加随机偏移
|
||||
reset_mindbot_right_arm = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
|
||||
"position_range": (0.0, 0.0),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
|
||||
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
|
||||
params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
|
||||
|
||||
@configclass
|
||||
class RewardsCfg:
|
||||
"""Reward terms for the MDP."""
|
||||
|
||||
gripper_handle_orientation_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_orientation_alignment,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name": "Equipment_BB_13_C",
|
||||
"gripper_body_name": "right_hand_body",
|
||||
"gripper_forward_axis": (0.0, 0.0, 1.0),
|
||||
"gripper_width_axis": (0.0, 1.0, 0.0),
|
||||
"lid_handle_axis": (0.0, 0.0, 1.0),
|
||||
# 删除了 handle_axis,因为新函数中不再使用它
|
||||
"max_angle_deg": 30.0, # 建议改为 30.0 或更小,85.0 对指向性约束来说太宽松
|
||||
},
|
||||
weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
|
||||
)
|
||||
|
||||
# stage 2
|
||||
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
|
||||
|
||||
gripper_handle_position_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_position_alignment,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name":"Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
"std": 0.3, # 位置对齐的容忍度0.3 ###0.1
|
||||
},
|
||||
weight=3.0
|
||||
)
|
||||
|
||||
# 强制远距离打开奖励
|
||||
# 远距离闭合惩罚
|
||||
gripper_closed_penalty = RewTerm(
|
||||
func=mdp.gripper_open_penalty_when_far,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name": "Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
"dist_threshold": 0.02, # 3cm 以外不允许闭合
|
||||
"max_penalty_dist":0.055 #0.055
|
||||
},
|
||||
weight=1.0 # 提高权重,让扣分很痛8.0
|
||||
)
|
||||
|
||||
|
||||
# 【新增】精细对齐 (引导进入 2cm 圈)
|
||||
gripper_handle_fine_alignment = RewTerm(
|
||||
func=mdp.gripper_handle_position_alignment,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name":"Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"height_offset": 0.00, # Z方向:lid 上方 0.1m
|
||||
"std": 0.03, # 位置对齐的容忍度0.05
|
||||
},
|
||||
weight=5.0 # 高权重
|
||||
)
|
||||
|
||||
# gripper_close_interaction = RewTerm(
|
||||
# func=mdp.gripper_close_when_near,
|
||||
# params={
|
||||
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
# "robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
# "handle_name": "Equipment_BB_13_C",
|
||||
# "left_gripper_name": "right_hand_l",
|
||||
# "right_gripper_name": "right_hand__r",
|
||||
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
# # 关键匹配把手尺寸
|
||||
# "handle_thickness": 0.0388, # 把手厚度(X/宽度向抓取方向)
|
||||
# "thickness_tol": 0.008, # 误差容忍,约 0.8cm
|
||||
# "grasp_radius": 0.04, # 进入此半径才计算闭合奖励
|
||||
# "height_offset": 0.00,
|
||||
# "target_close_pos": 0.03, # 保留给关节闭合幅度
|
||||
# },
|
||||
# weight=10.0
|
||||
# )
|
||||
gripper_close_interaction = RewTerm(
|
||||
func=mdp.gripper_close_when_near,
|
||||
params={
|
||||
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"handle_name":"Equipment_BB_13_C",
|
||||
"left_gripper_name": "right_hand_l",
|
||||
"right_gripper_name": "right_hand__r",
|
||||
"joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
|
||||
"target_close_pos": 0.03,
|
||||
"height_offset": 0.00,#0.07
|
||||
"grasp_radius": 0.04,#original 0.03
|
||||
},
|
||||
weight=10.0
|
||||
)
|
||||
|
||||
# lid_lifted_reward = RewTerm(
|
||||
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
|
||||
# params={
|
||||
# "lid_cfg": SceneEntityCfg("lid"),
|
||||
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
|
||||
# },
|
||||
# weight=10.0 # 给一个足够大的权重
|
||||
# )
|
||||
|
||||
# lid_lifting_reward = RewTerm(
|
||||
# func=mdp.lid_lift_progress_reward,
|
||||
# params={
|
||||
# "lid_cfg": SceneEntityCfg("lid"),
|
||||
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
|
||||
# "target_height_lift": 0.25,
|
||||
# "height_offset": 0.07, # 与其他奖励保持一致
|
||||
# "std": 0.1
|
||||
# },
|
||||
# # 权重设大一点,告诉它这是最终目标
|
||||
# weight=10.0
|
||||
# )
|
||||
|
||||
|
||||
|
||||
|
||||
@configclass
|
||||
class TerminationsCfg:
|
||||
"""Termination terms for the MDP."""
|
||||
|
||||
time_out = DoneTerm(func=mdp.time_out, time_out=True)
|
||||
|
||||
mindbot_fly_away = DoneTerm(
|
||||
func=mdp.base_height_failure,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
|
||||
)
|
||||
|
||||
# lid_fly_away = DoneTerm(
|
||||
# func=mdp.base_height_failure,
|
||||
# params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
|
||||
# )
|
||||
|
||||
# # 新增:盖子掉落判定
|
||||
# # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
|
||||
# lid_dropped = DoneTerm(
|
||||
# func=mdp.base_height_failure, # 复用高度判定函数
|
||||
# params={
|
||||
# "asset_cfg": SceneEntityCfg("lid"),
|
||||
# "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
|
||||
# },
|
||||
# )
|
||||
|
||||
|
||||
|
||||
##
|
||||
# Environment configuration
|
||||
##
|
||||
|
||||
@configclass
|
||||
class CurriculumCfg:
|
||||
pass
|
||||
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# position_std_scheduler = CurrTerm(
|
||||
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
|
||||
# params={
|
||||
# # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键
|
||||
# "address": "rewards.gripper_lid_position_alignment.params.std",
|
||||
|
||||
# # 修改逻辑:使用我们刚才写的函数
|
||||
# "modify_fn": mdp.annealing_std,
|
||||
|
||||
# # 传给函数的参数
|
||||
# "modify_params": {
|
||||
# "start_step": 00, # 约 600 轮
|
||||
# "end_step": 5_000, # 约 1200 轮
|
||||
# "start_std": 0.3,
|
||||
# "end_std": 0.05,
|
||||
# }
|
||||
# }
|
||||
# )
|
||||
|
||||
@configclass
|
||||
class PullEnvCfg(ManagerBasedRLEnvCfg):
|
||||
# Scene settings
|
||||
scene: PullSceneCfg = PullSceneCfg(num_envs=5, env_spacing=3.0)
|
||||
# Basic settings
|
||||
observations: ObservationsCfg = ObservationsCfg()
|
||||
actions: ActionsCfg = ActionsCfg()
|
||||
events: EventCfg = EventCfg()
|
||||
# MDP settings
|
||||
rewards: RewardsCfg = RewardsCfg()
|
||||
terminations: TerminationsCfg = TerminationsCfg()
|
||||
# curriculum: CurriculumCfg = CurriculumCfg()
|
||||
|
||||
# Post initialization
|
||||
def __post_init__(self) -> None:
|
||||
"""Post initialization."""
|
||||
# general settings
|
||||
self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz
|
||||
self.episode_length_s = 10
|
||||
# viewer settings
|
||||
self.viewer.eye = (3.5, 0.0, 3.2)
|
||||
# simulation settings
|
||||
self.sim.dt = 1 / 120 #original 1 / 60
|
||||
self.sim.render_interval = self.decimation
|
||||
# # 1. 基础堆内存
|
||||
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
|
||||
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
|
||||
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
|
||||
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
|
||||
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
|
||||
|
||||
# # 5. 聚合对容量 (针对复杂的 Articulation)
|
||||
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from . import agents
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
|
||||
gym.register(
|
||||
id="Pull-Ultrasound-Lid-Up-v0",
|
||||
entry_point="isaaclab.envs:ManagerBasedRLEnv",
|
||||
disable_env_checker=True,
|
||||
kwargs={
|
||||
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
|
||||
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
|
||||
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
|
||||
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
|
||||
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
|
||||
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
|
||||
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
|
||||
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -0,0 +1,78 @@
|
||||
params:
|
||||
seed: 42
|
||||
|
||||
# environment wrapper clipping
|
||||
env:
|
||||
# added to the wrapper
|
||||
clip_observations: 5.0
|
||||
# can make custom wrapper?
|
||||
clip_actions: 1.0
|
||||
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
# doesn't have this fine grained control but made it close
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [32, 32]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
|
||||
initializer:
|
||||
name: default
|
||||
regularizer:
|
||||
name: None
|
||||
|
||||
load_checkpoint: False # flag which sets whether to load the checkpoint
|
||||
load_path: '' # path to the checkpoint to load
|
||||
|
||||
config:
|
||||
name: cartpole_direct
|
||||
env_name: rlgpu
|
||||
device: 'cuda:0'
|
||||
device_name: 'cuda:0'
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: False
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
num_actors: -1 # configured from the script (based on num_envs)
|
||||
reward_shaper:
|
||||
scale_value: 0.1
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau : 0.95
|
||||
learning_rate: 5e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 150
|
||||
save_best_after: 50
|
||||
save_frequency: 25
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.0
|
||||
truncate_grads: True
|
||||
e_clip: 0.2
|
||||
horizon_length: 32
|
||||
minibatch_size: 16384
|
||||
mini_epochs: 8
|
||||
critic_coef: 4
|
||||
clip_value: True
|
||||
seq_length: 4
|
||||
bounds_loss_coef: 0.0001
|
||||
@@ -0,0 +1,72 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
|
||||
|
||||
|
||||
@configclass
|
||||
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
num_steps_per_env = 48 # 稍微增加每轮步数
|
||||
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
|
||||
save_interval = 100
|
||||
# experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
|
||||
experiment_name = "mindbot_pullUltrasoundLidUp"
|
||||
|
||||
policy = RslRlPpoActorCriticCfg(
|
||||
init_noise_std=0.7,
|
||||
actor_obs_normalization=True, # 开启归一化
|
||||
critic_obs_normalization=True, # 开启归一化
|
||||
# 增加网络容量:三层 MLP,宽度足够
|
||||
actor_hidden_dims=[128, 64, 32],
|
||||
critic_hidden_dims=[128, 64, 32],
|
||||
activation="elu",
|
||||
)
|
||||
|
||||
algorithm = RslRlPpoAlgorithmCfg(
|
||||
value_loss_coef=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
clip_param=0.2,
|
||||
entropy_coef=0.005, # 保持适度的探索
|
||||
num_learning_epochs=5,
|
||||
num_mini_batches=8,
|
||||
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
|
||||
schedule="adaptive",
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
desired_kl=0.01,
|
||||
max_grad_norm=1.0,
|
||||
)
|
||||
|
||||
|
||||
# @configclass
|
||||
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
# num_steps_per_env = 16
|
||||
# max_iterations = 150
|
||||
# save_interval = 50
|
||||
# experiment_name = "cartpole_direct"
|
||||
# policy = RslRlPpoActorCriticCfg(
|
||||
# init_noise_std=1.0,
|
||||
# actor_obs_normalization=False,
|
||||
# critic_obs_normalization=False,
|
||||
# actor_hidden_dims=[32, 32],
|
||||
# critic_hidden_dims=[32, 32],
|
||||
# activation="elu",
|
||||
# )
|
||||
# algorithm = RslRlPpoAlgorithmCfg(
|
||||
# value_loss_coef=1.0,
|
||||
# use_clipped_value_loss=True,
|
||||
# clip_param=0.2,
|
||||
# entropy_coef=0.005,
|
||||
# num_learning_epochs=5,
|
||||
# num_mini_batches=4,
|
||||
# learning_rate=1.0e-3,
|
||||
# schedule="adaptive",
|
||||
# gamma=0.99,
|
||||
# lam=0.95,
|
||||
# desired_kl=0.01,
|
||||
# max_grad_norm=1.0,
|
||||
# )
|
||||
@@ -0,0 +1,20 @@
|
||||
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
|
||||
seed: 42
|
||||
|
||||
n_timesteps: !!float 1e6
|
||||
policy: 'MlpPolicy'
|
||||
n_steps: 16
|
||||
batch_size: 4096
|
||||
gae_lambda: 0.95
|
||||
gamma: 0.99
|
||||
n_epochs: 20
|
||||
ent_coef: 0.01
|
||||
learning_rate: !!float 3e-4
|
||||
clip_range: !!float 0.2
|
||||
policy_kwargs:
|
||||
activation_fn: nn.ELU
|
||||
net_arch: [32, 32]
|
||||
squash_output: False
|
||||
vf_coef: 1.0
|
||||
max_grad_norm: 1.0
|
||||
device: "cuda:0"
|
||||
@@ -0,0 +1,111 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: -2.9
|
||||
fixed_log_std: True
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
discriminator: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
# AMP memory (reference motion dataset)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
motion_dataset:
|
||||
class: RandomMemory
|
||||
memory_size: 200000
|
||||
|
||||
# AMP memory (preventing discriminator overfitting)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
reply_buffer:
|
||||
class: RandomMemory
|
||||
memory_size: 1000000
|
||||
|
||||
|
||||
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
|
||||
agent:
|
||||
class: AMP
|
||||
rollouts: 16
|
||||
learning_epochs: 6
|
||||
mini_batches: 2
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-05
|
||||
learning_rate_scheduler: null
|
||||
learning_rate_scheduler_kwargs: null
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
amp_state_preprocessor: RunningStandardScaler
|
||||
amp_state_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 0.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.5
|
||||
discriminator_loss_scale: 5.0
|
||||
amp_batch_size: 512
|
||||
task_reward_weight: 0.0
|
||||
style_reward_weight: 1.0
|
||||
discriminator_batch_size: 4096
|
||||
discriminator_reward_scale: 2.0
|
||||
discriminator_logit_regularization_scale: 0.05
|
||||
discriminator_gradient_penalty_scale: 5.0
|
||||
discriminator_weight_decay_scale: 1.0e-04
|
||||
# rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "humanoid_amp_run"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 80000
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
|
||||
agent:
|
||||
class: IPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,82 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
|
||||
agent:
|
||||
class: MAPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
shared_state_preprocessor: RunningStandardScaler
|
||||
shared_state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
|
||||
agent:
|
||||
class: PPO
|
||||
rollouts: 32
|
||||
learning_epochs: 8
|
||||
mini_batches: 8
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 0.1
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cartpole_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""This sub-module contains the functions that are specific to the environment."""
|
||||
|
||||
from isaaclab.envs.mdp import * # noqa: F401, F403
|
||||
|
||||
from .rewards import * # noqa: F401, F403
|
||||
from .terminations import * # noqa: F401, F403
|
||||
from .gripper import * # noqa: F401, F403
|
||||
from .curriculums import * # noqa: F401, F403
|
||||
@@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
from isaaclab.envs.mdp import modify_term_cfg
|
||||
|
||||
def annealing_std(
|
||||
env: ManagerBasedRLEnv,
|
||||
env_ids: torch.Tensor,
|
||||
current_value: float,
|
||||
start_step: int,
|
||||
end_step: int,
|
||||
start_std: float,
|
||||
end_std: float
|
||||
):
|
||||
"""
|
||||
根据步数线性退火 std 值。
|
||||
|
||||
Args:
|
||||
current_value: 当前的参数值 (系统自动传入)
|
||||
start_step: 开始退火的步数
|
||||
end_step: 结束退火的步数
|
||||
start_std: 初始 std
|
||||
end_std: 最终 std
|
||||
"""
|
||||
current_step = env.common_step_counter
|
||||
|
||||
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
|
||||
if current_step < start_step:
|
||||
# 如果当前值还没被设为 start_std,就强制设一下,否则不动
|
||||
return start_std
|
||||
|
||||
# 2. 已经超过结束时间 -> 保持最终值
|
||||
elif current_step >= end_step:
|
||||
return end_std
|
||||
|
||||
# 3. 在中间 -> 线性插值
|
||||
else:
|
||||
ratio = (current_step - start_step) / (end_step - start_step)
|
||||
new_std = start_std + (end_std - start_std) * ratio
|
||||
return new_std
|
||||
@@ -0,0 +1,54 @@
|
||||
# 假设这是在你的 mdp.py 文件中
|
||||
|
||||
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
|
||||
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
|
||||
from isaaclab.utils import configclass
|
||||
import torch
|
||||
|
||||
|
||||
class CoupledJointPositionAction(JointPositionAction):
|
||||
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
|
||||
super().__init__(cfg, env)
|
||||
|
||||
@property
|
||||
def action_dim(self) -> int:
|
||||
"""强制 ActionManager 认为只需要 1D 输入。"""
|
||||
return 1
|
||||
|
||||
"""
|
||||
这是运行时被实例化的类。它继承自 JointPositionAction。
|
||||
"""
|
||||
def process_actions(self, actions: torch.Tensor):
|
||||
scale = self.cfg.scale
|
||||
offset = self.cfg.offset
|
||||
# store the raw actions
|
||||
self._raw_actions[:] = torch.clamp(actions, -1, 1)
|
||||
# apply the affine transformations
|
||||
target_position_interval = self._raw_actions * scale + offset
|
||||
right_cmd = target_position_interval
|
||||
left_cmd = -target_position_interval
|
||||
# import pdb; pdb.set_trace()
|
||||
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
|
||||
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
|
||||
# clip actions
|
||||
if self.cfg.clip is not None:
|
||||
self._processed_actions = torch.clamp(
|
||||
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class CoupledJointPositionActionCfg(JointPositionActionCfg):
|
||||
"""
|
||||
配置类。关键在于设置 class_type 指向上面的实现类。
|
||||
"""
|
||||
# !!! 关键点 !!!
|
||||
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
|
||||
class_type = CoupledJointPositionAction
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
|
||||
if len(self.joint_names) != 2:
|
||||
raise ValueError("Requires exactly two joint names.")
|
||||
super().__post_init__()
|
||||
|
||||
@@ -0,0 +1,881 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from isaaclab.assets import Articulation, RigidObject
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
"""Penalize joint position deviation from a target value."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
# wrap the joint positions to (-pi, pi)
|
||||
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
|
||||
# compute the reward
|
||||
return torch.sum(torch.square(joint_pos - target), dim=1)
|
||||
|
||||
|
||||
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
|
||||
"""Reward the agent for lifting the lid above the minimal height."""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
|
||||
height = lid.data.root_pos_w[:, 2]
|
||||
return torch.where(height > minimal_height, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for reaching the lid using tanh-kernel."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# float 化,防止成为 (3,) 向量
|
||||
if not isinstance(std, float):
|
||||
if torch.is_tensor(std):
|
||||
std = std.item()
|
||||
else:
|
||||
std = float(std)
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
# body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
|
||||
|
||||
return 1 - torch.tanh(distance / std)
|
||||
|
||||
|
||||
def lid_grasped(env: ManagerBasedRLEnv,
|
||||
distance_threshold: float = 0.05,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Reward the agent for grasping the lid (close and lifted)."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# Target lid position: (num_envs, 3)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# Gripper position: (num_envs, 3)
|
||||
body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
|
||||
|
||||
# Distance of the gripper to the lid: (num_envs,)
|
||||
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
|
||||
|
||||
# Check if close and lifted
|
||||
is_close = distance < distance_threshold
|
||||
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
|
||||
|
||||
return torch.where(is_close & is_lifted, 1.0, 0.0)
|
||||
|
||||
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
"""Combined reward for reaching the lid AND lifting it."""
|
||||
# Get reaching reward
|
||||
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
|
||||
# Get lifting reward
|
||||
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
|
||||
# Combine rewards multiplicatively
|
||||
return reach_reward * lift_reward
|
||||
|
||||
|
||||
|
||||
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand__l",
|
||||
# right_gripper_name: str = "left_hand_r",
|
||||
# height_offset: float = 0.1,
|
||||
# std: float = 0.1) -> torch.Tensor:
|
||||
# """奖励函数:夹爪相对于lid的位置对齐。
|
||||
|
||||
# Args:
|
||||
# env: 环境实例
|
||||
# lid_cfg: lid的配置
|
||||
# robot_cfg: 机器人的配置
|
||||
# left_gripper_name: 左侧夹爪body名称
|
||||
# right_gripper_name: 右侧夹爪body名称
|
||||
# side_distance: Y方向的期望距离
|
||||
# height_offset: Z方向的期望高度偏移
|
||||
# std: tanh核函数的标准差
|
||||
|
||||
# Returns:
|
||||
# 位置对齐奖励 (num_envs,)
|
||||
# """
|
||||
# lid: RigidObject = env.scene[lid_cfg.name]
|
||||
# robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# # 获取两个夹爪的位置
|
||||
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# # 计算夹爪中心位置
|
||||
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
|
||||
# # 计算相对位置(夹爪中心相对于 lid 中心)
|
||||
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
|
||||
|
||||
# # X 方向位置:应该对齐(接近 0)
|
||||
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
|
||||
# x_reward = 1.0 - torch.tanh(x_dist / std)
|
||||
|
||||
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance)
|
||||
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
|
||||
# # y_error = torch.abs(y_dist - side_distance)
|
||||
# # y_reward = 1.0 - torch.tanh(y_error / std)
|
||||
# y_reward = 1.0 - torch.tanh(y_dist / std)
|
||||
|
||||
# # Z 方向位置:应该在 lid 上方 height_offset 处
|
||||
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
|
||||
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
|
||||
# z_reward = 1.0 - torch.tanh(z_error / std)
|
||||
|
||||
# # 组合位置奖励
|
||||
# position_reward = x_reward * y_reward * z_reward
|
||||
|
||||
# return position_reward
|
||||
|
||||
|
||||
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.02,
|
||||
std: float = 0.1) -> torch.Tensor:
|
||||
"""奖励函数:夹爪相对于lid的位置对齐(简化版)。
|
||||
|
||||
计算夹爪中心到目标点(lid上方height_offset处)的距离奖励。
|
||||
|
||||
Args:
|
||||
env: 环境实例
|
||||
lid_cfg: lid的配置
|
||||
robot_cfg: 机器人的配置
|
||||
left_gripper_name: 左侧夹爪body名称
|
||||
right_gripper_name: 右侧夹爪body名称
|
||||
height_offset: Z方向的期望高度偏移(目标点在lid上方)
|
||||
std: tanh核函数的标准差
|
||||
|
||||
Returns:
|
||||
位置对齐奖励 (num_envs,)
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 目标点:lid位置 + height_offset(Z方向)
|
||||
target_pos = lid_pos_w.clone()
|
||||
# print(target_pos[0])
|
||||
target_pos[:, 2] += height_offset # 在lid上方height_offset处
|
||||
# print(target_pos[0])
|
||||
# 获取两个夹爪的位置
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
|
||||
|
||||
# 计算夹爪中心位置
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
|
||||
# print(gripper_center[0])
|
||||
# 计算夹爪中心到目标点的3D距离
|
||||
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# 距离奖励:距离越小,奖励越大
|
||||
position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
# ... (保留原有的 import)
|
||||
|
||||
# ==============================================================================
|
||||
# 新增:直接距离惩罚 (线性引导)
|
||||
# ==============================================================================
|
||||
def gripper_lid_distance_penalty(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.07,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
计算夹爪中心到目标点的欧式距离。
|
||||
返回的是正的距离值,我们会在 config 里给它设置负权重 (比如 weight=-1.0)。
|
||||
这样:距离越远 -> 惩罚越大 (Total Reward 越低)。
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
# 1. 目标点 (Lid中心 + 高度偏移)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
lid_pos_w[:, 2] += height_offset
|
||||
|
||||
# 2. 夹爪中心
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
gripper_center = (left_pos + right_pos) / 2.0
|
||||
|
||||
# 3. 计算距离
|
||||
distance = torch.norm(gripper_center - lid_pos_w, dim=1)
|
||||
|
||||
return distance
|
||||
|
||||
# ==============================================================================
|
||||
# 优化:位置对齐 (保持原函数,但确保逻辑清晰)
|
||||
# ==============================================================================
|
||||
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
height_offset: float = 0.02,
|
||||
std: float = 0.1) -> torch.Tensor:
|
||||
"""奖励函数:基于 tanh 核函数的距离奖励 (靠近得越多,分数涨得越快)"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
target_pos = lid_pos_w.clone()
|
||||
target_pos[:, 2] += height_offset
|
||||
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
|
||||
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
|
||||
|
||||
# 这里保持不变,关键在于我们在 config 里怎么设置 std 和 weight
|
||||
position_reward = 1.0 - torch.tanh(distance / std)
|
||||
|
||||
return position_reward
|
||||
|
||||
|
||||
# def gripper_lid_orientation_alignment(
|
||||
# env,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# gripper_body_name: str = "left_hand_body",
|
||||
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
|
||||
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
|
||||
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 获取姿态
|
||||
# lid_quat_w = lid.data.root_quat_w
|
||||
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
|
||||
|
||||
# # 3. 垂直对齐 (Vertical Alignment)
|
||||
# # 目标:夹爪 +Y 指向 World -Z
|
||||
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
|
||||
# # print(grip_fwd_world[0])
|
||||
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
|
||||
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
|
||||
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
|
||||
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
|
||||
# # rew_vertical = (dot_vertical + 1.0) / 2.0
|
||||
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
|
||||
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
|
||||
# # nan -> 0
|
||||
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
|
||||
|
||||
# # 4. 偏航对齐 (Yaw Alignment)
|
||||
# # 目标:夹爪 +Z 平行于 Lid +Y
|
||||
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
|
||||
|
||||
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
|
||||
|
||||
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
|
||||
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
|
||||
# # nan -> 0
|
||||
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
|
||||
|
||||
# # 5. 组合
|
||||
# total_reward = rew_vertical * rew_yaw
|
||||
|
||||
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
|
||||
# # debug
|
||||
# if not torch.isfinite(total_reward).all():
|
||||
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
# print(f"rew_vertical: {rew_vertical}")
|
||||
# print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
# return total_reward
|
||||
|
||||
|
||||
def gripper_lid_orientation_alignment(
|
||||
env,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body",
|
||||
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
|
||||
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
|
||||
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
|
||||
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 获取对象
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 获取姿态
|
||||
lid_quat_w = lid.data.root_quat_w
|
||||
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
|
||||
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
|
||||
|
||||
# 3. 垂直对齐 (Vertical Alignment)
|
||||
# 目标:夹爪前向轴指向 World -Z(向下)
|
||||
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
|
||||
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
|
||||
|
||||
# 计算点积(归一化后,点积 = cos(角度))
|
||||
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
|
||||
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
|
||||
|
||||
# 计算角度(弧度)
|
||||
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
|
||||
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
|
||||
|
||||
# 如果角度 <= max_angle_deg,给予奖励
|
||||
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
|
||||
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
|
||||
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
|
||||
|
||||
# 如果角度超过60度,奖励为0
|
||||
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
|
||||
|
||||
# nan -> 0
|
||||
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
|
||||
|
||||
# 4. 偏航对齐 (Yaw Alignment)
|
||||
# 目标:夹爪 +Z 平行于 Lid +Y
|
||||
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
|
||||
|
||||
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
|
||||
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
|
||||
|
||||
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
|
||||
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
|
||||
# nan -> 0
|
||||
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
|
||||
|
||||
# 5. 组合
|
||||
total_reward = rew_vertical * rew_yaw
|
||||
|
||||
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
|
||||
# debug
|
||||
if not torch.isfinite(total_reward).all():
|
||||
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
|
||||
print(f"rew_vertical: {rew_vertical}")
|
||||
print(f"rew_yaw: {rew_yaw}")
|
||||
|
||||
return total_reward
|
||||
|
||||
|
||||
# def gripper_open_close_reward(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
|
||||
# # 注意:确保 joint_names 只包含那两个夹爪关节
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
|
||||
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
|
||||
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 逻辑很简单:
|
||||
# 1. 如果 距离 < close_threshold:目标关节位置 = 0.03 (闭合)
|
||||
# 2. 如果 距离 >= close_threshold:目标关节位置 = 0.0 (张开)
|
||||
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
|
||||
# """
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 计算距离 (末端 vs Lid中心)
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# # distance: (num_envs,)
|
||||
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
|
||||
|
||||
# # 3. 动态确定目标关节值
|
||||
# # 如果距离小于阈值,目标设为 close_pos,否则设为 open_pos
|
||||
# # target_val: (num_envs, 1)
|
||||
# # print(distance)
|
||||
# target_val = torch.where(
|
||||
# distance < close_threshold,
|
||||
# torch.tensor(target_close_pos, device=env.device),
|
||||
# torch.tensor(target_open_pos, device=env.device)
|
||||
# ).unsqueeze(1)
|
||||
|
||||
# # 4. 获取当前关节位置
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # joint_pos: (num_envs, 2)
|
||||
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
|
||||
|
||||
# # 取绝对值(防止左指是负数的情况,虽然你的配置里范围看起来都是正的,加上abs更保险)
|
||||
# current_joint_pos_abs = torch.abs(current_joint_pos)
|
||||
|
||||
# # 5. 计算误差
|
||||
# # error: (num_envs,) -> 两个手指误差的平均值
|
||||
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
|
||||
|
||||
# # # 6. 返回负奖励 (惩罚)
|
||||
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
|
||||
# # return -error
|
||||
# # =====================================================
|
||||
# # 🚀 核心修复:安全输出
|
||||
# # =====================================================
|
||||
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
|
||||
# reward = -torch.tanh(error / 0.01)
|
||||
|
||||
# # 2. 过滤 NaN!这一步能救命!
|
||||
# # 如果物理引擎算出 NaN,这里会把它变成 0.0,防止训练崩溃
|
||||
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
# def gripper_open_close_reward(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# left_gripper_name: str = "left_hand_body",
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# close_threshold: float = 0.05,
|
||||
# target_open_pos: float = 0.0,
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
|
||||
# ) -> torch.Tensor:
|
||||
# """
|
||||
# 逻辑:
|
||||
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
|
||||
# 2. 如果距离 < close_threshold:目标关节位置 = 闭合。
|
||||
# 3. 否则:目标关节位置 = 张开。
|
||||
# """
|
||||
|
||||
# # 1. 获取对象
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
|
||||
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
|
||||
|
||||
# # 获取夹爪位置
|
||||
# body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# # 3. 计算距离 (夹爪 vs 目标抓取点)
|
||||
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
|
||||
|
||||
# # 4. 动态确定目标关节值
|
||||
# # 靠近抓取点 -> 闭合;远离 -> 张开
|
||||
# target_val = torch.where(
|
||||
# distance < close_threshold,
|
||||
# torch.tensor(target_close_pos, device=env.device),
|
||||
# torch.tensor(target_open_pos, device=env.device)
|
||||
# ).unsqueeze(1)
|
||||
|
||||
# # 5. 获取当前关节位置
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
|
||||
# current_joint_pos_abs = torch.abs(current_joint_pos)
|
||||
|
||||
# # 6. 计算误差并返回奖励
|
||||
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
|
||||
|
||||
# # 使用 tanh 限制数值范围
|
||||
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
def _is_grasped(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg,
|
||||
robot_cfg: SceneEntityCfg,
|
||||
left_gripper_name: str,
|
||||
right_gripper_name: str,
|
||||
joint_names: list,
|
||||
height_offset: float,
|
||||
grasp_radius: float,
|
||||
target_close_pos: float
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
内部辅助函数:判定是否成功抓取。
|
||||
条件:
|
||||
1. 夹爪中心在把手目标点附近 (Sphere Check)
|
||||
2. 夹爪处于闭合发力状态 (Joint Effort Check)
|
||||
3. (关键) 夹爪Z轴高度合适,不能压在盖子上面 (Z-Axis Check)
|
||||
"""
|
||||
# 1. 获取对象
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 2. 目标点位置 (把手中心)
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
lid_pos_w[:, 2] += height_offset
|
||||
|
||||
# 3. 夹爪位置
|
||||
l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
gripper_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# 4. 条件一:距离判定 (在把手球范围内)
|
||||
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
|
||||
is_near = dist_to_handle < grasp_radius
|
||||
|
||||
# 5. 条件二:Z轴高度判定 (防止压在盖子上)
|
||||
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
|
||||
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
|
||||
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
|
||||
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
|
||||
|
||||
# 6. 条件三:闭合判定 (正在尝试闭合)
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
# 获取关节位置 (绝对值)
|
||||
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
|
||||
# === 修改闭合判定 ===
|
||||
|
||||
# 1. 关节位置判定 (Effort Check)
|
||||
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
|
||||
# 假设 0.05 是你的新 target_close_pos
|
||||
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
|
||||
|
||||
# 2. 几何排斥判定 (Geometry Check)
|
||||
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
|
||||
# 获取指尖位置
|
||||
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
|
||||
|
||||
# 假设把手厚度是 1cm (0.01m)
|
||||
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
|
||||
# 如果指尖距离 >= 0.005,说明中间有东西挡着
|
||||
is_not_empty = dist_fingertips > 0.005
|
||||
|
||||
# 综合判定:
|
||||
# 1. 在把手附近 (is_near)
|
||||
# 2. 高度合适 (is_z_valid)
|
||||
# 3. 在用力闭合 (is_effort_closing)
|
||||
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
|
||||
return is_near & is_z_valid & is_effort_closing & is_not_empty
|
||||
|
||||
|
||||
def gripper_close_when_near(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
target_close_pos: float = 0.03,
|
||||
height_offset: float = 0.02,
|
||||
grasp_radius: float = 0.05
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定基础抓取条件是否满足
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算夹紧程度 (Clamping Intensity)
|
||||
robot = env.scene[robot_cfg.name]
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
# 获取当前关节位置绝对值 (num_envs, num_joints)
|
||||
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# 计算两个指关节的平均闭合深度
|
||||
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
|
||||
|
||||
# 计算奖励系数:当前位置 / 目标闭合位置
|
||||
# 这样当关节越接近 0.03 时,奖励越接近 1.0
|
||||
# 强制让 Agent 产生“压满”动作的冲动
|
||||
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
|
||||
|
||||
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
|
||||
# 如果没对准或者没夹到,奖励依然是 0
|
||||
return torch.where(is_grasped, clamping_factor, 0.0)
|
||||
|
||||
# def gripper_close_when_near(
|
||||
# env: ManagerBasedRLEnv,
|
||||
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
|
||||
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
|
||||
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
|
||||
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# target_close_pos: float = 0.03,
|
||||
# height_offset: float = 0.03,
|
||||
# grasp_radius: float = 0.02 # 球面半径 2cm
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# # 1. 获取位置
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# robot = env.scene[robot_cfg.name]
|
||||
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
|
||||
# lid_pos_w[:, 2] += height_offset # 把手中心
|
||||
|
||||
# # 获取左右指尖位置
|
||||
# l_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
# r_ids, _ = robot.find_bodies([right_gripper_name])
|
||||
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
|
||||
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
|
||||
|
||||
# # 计算夹爪中心
|
||||
# pos_center = (pos_L + pos_R) / 2.0
|
||||
|
||||
# # 2. 距离判定 (Is Inside Sphere?)
|
||||
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
|
||||
# is_in_sphere = (dist_center < grasp_radius).float()
|
||||
|
||||
# # 3. "在中间"判定 (Is Between?)
|
||||
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
|
||||
# vec_LR = pos_R - pos_L # 左指指向右指
|
||||
# vec_LO = lid_pos_w - pos_L # 左指指向物体
|
||||
|
||||
# # 计算投影比例 t
|
||||
# # P_proj = P_L + t * (P_R - P_L)
|
||||
# # t = (vec_LO . vec_LR) / |vec_LR|^2
|
||||
# # 如果 0 < t < 1,说明投影在两个指尖之间
|
||||
|
||||
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
|
||||
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
|
||||
# t = dot / len_sq
|
||||
|
||||
# is_between = (t > 0.0) & (t < 1.0)
|
||||
# is_between = is_between.float()
|
||||
|
||||
# # 4. 闭合判定
|
||||
# joint_ids, _ = robot.find_joints(joint_names)
|
||||
# # 注意:如果是 Binary Action,可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
|
||||
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
|
||||
# # 只有当非常接近闭合目标时才给分
|
||||
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
|
||||
|
||||
# # 5. 最终奖励
|
||||
# # 只有三者同时满足才给 1.0 分
|
||||
# reward = is_in_sphere * is_between * is_closing
|
||||
|
||||
# return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
# def lid_is_lifted(
|
||||
# env:ManagerBasedRLEnv,
|
||||
# minimal_height:float,
|
||||
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
|
||||
# ) -> torch.Tensor:
|
||||
|
||||
# lid = env.scene[lid_cfg.name]
|
||||
# lid_height = lid.data.root_pos_w[:, 2]
|
||||
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
|
||||
# reward=torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
# return reward
|
||||
|
||||
|
||||
def lid_is_lifted(
|
||||
env: ManagerBasedRLEnv,
|
||||
minimal_height: float = 0.05, # 相对提升阈值
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
height_offset: float = 0.03,
|
||||
grasp_radius: float = 0.1,
|
||||
target_close_pos: float = 0.03,
|
||||
) -> torch.Tensor:
|
||||
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
lid = env.scene[lid_cfg.name]
|
||||
|
||||
# 1. 获取高度
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
# 自动获取初始高度
|
||||
initial_height = lid.data.default_root_state[:, 2]
|
||||
|
||||
# 2. 计算提升量
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0)
|
||||
|
||||
# 3. 速度检查 (防撞飞)
|
||||
# 如果速度 > 1.0 m/s,视为被撞飞,不给分
|
||||
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
|
||||
is_stable = lid_vel < 1.0
|
||||
|
||||
# 4. 计算奖励
|
||||
# 基础分:高度越高分越高
|
||||
shaping_reward = lift_height * 2.0
|
||||
# 成功分:超过阈值
|
||||
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
|
||||
|
||||
# 组合
|
||||
# 必须 is_grasped AND is_stable
|
||||
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
|
||||
|
||||
return torch.nan_to_num(reward, nan=0.0)
|
||||
|
||||
|
||||
|
||||
def lid_lift_success_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand_body",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
# 关键参数
|
||||
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
|
||||
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
|
||||
grasp_dist_threshold: float = 0.05, # 抓取判定距离
|
||||
closed_pos: float = 0.03 # 夹爪闭合阈值
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
|
||||
"""
|
||||
# 1. 获取数据
|
||||
lid = env.scene[lid_cfg.name]
|
||||
robot = env.scene[robot_cfg.name]
|
||||
|
||||
# 盖子实时高度
|
||||
lid_z = lid.data.root_pos_w[:, 2]
|
||||
lid_pos = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 夹爪位置
|
||||
body_ids, _ = robot.find_bodies([left_gripper_name])
|
||||
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
|
||||
|
||||
# 2. 【条件一:是否夹稳 (Is Grasped?)】
|
||||
# 距离检查
|
||||
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
|
||||
is_close = distance < grasp_dist_threshold
|
||||
|
||||
# 闭合检查
|
||||
joint_ids, _ = robot.find_joints(joint_names)
|
||||
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
|
||||
# 假设 > 80% 的闭合度算抓紧了
|
||||
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
|
||||
|
||||
is_grasped = is_close & is_closed
|
||||
|
||||
# 3. 【条件二:计算相对提升 (Relative Lift)】
|
||||
# 当前高度 - 初始稳定高度
|
||||
current_lift = lid_z - settled_height
|
||||
|
||||
# 4. 计算奖励
|
||||
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
|
||||
|
||||
# 基础提升分 (Shaping Reward)
|
||||
lift_reward = lift_progress
|
||||
|
||||
# 成功大奖 (Success Bonus)
|
||||
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
|
||||
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
|
||||
|
||||
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
|
||||
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
|
||||
|
||||
# 5. 安全输出
|
||||
return torch.nan_to_num(total_reward, nan=0.0)
|
||||
|
||||
|
||||
def lid_lift_progress_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l",
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
initial_height: float = 0.8,
|
||||
target_height_lift: float = 0.15,
|
||||
height_offset: float = 0.02, # 必须与抓取奖励保持一致
|
||||
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
|
||||
target_close_pos: float = 0.03,
|
||||
std: float = 0.05 # 标准差
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 1. 判定是否夹住
|
||||
is_grasped = _is_grasped(
|
||||
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
|
||||
joint_names, height_offset, grasp_radius, target_close_pos
|
||||
)
|
||||
|
||||
# 2. 计算高度
|
||||
lid = env.scene[lid_cfg.name]
|
||||
current_height = lid.data.root_pos_w[:, 2]
|
||||
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
|
||||
# print(current_height)
|
||||
# print(lift_height)
|
||||
# 3. 计算奖励
|
||||
# 只有在 is_grasped 为 True 时,才发放高度奖励
|
||||
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
|
||||
progress = torch.tanh(lift_height / std)
|
||||
|
||||
reward = torch.where(is_grasped, progress, 0.0)
|
||||
|
||||
return reward
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
from isaaclab.assets import Articulation, RigidObject
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
def lid_dropped(env: ManagerBasedRLEnv,
|
||||
minimum_height: float = -0.05,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
return lid.data.root_pos_w[:, 2] < minimum_height
|
||||
|
||||
def lid_successfully_grasped(env: ManagerBasedRLEnv,
|
||||
distance_threshold: float = 0.03,
|
||||
height_threshold: float = 0.2,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
body_idx = robot.find_bodies(gripper_body_name)[0]
|
||||
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
|
||||
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
|
||||
is_close = distance < distance_threshold
|
||||
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
|
||||
return is_close & is_lifted
|
||||
|
||||
def gripper_at_lid_side(env: ManagerBasedRLEnv,
|
||||
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
|
||||
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
|
||||
left_gripper_name: str = "left_hand__l", # 改为两个下划线
|
||||
right_gripper_name: str = "left_hand_r",
|
||||
side_distance: float = 0.05,
|
||||
side_tolerance: float = 0.01,
|
||||
alignment_tolerance: float = 0.02,
|
||||
height_offset: float = 0.1,
|
||||
height_tolerance: float = 0.02) -> torch.Tensor:
|
||||
"""Terminate when gripper center is positioned on the side of the lid at specified height.
|
||||
|
||||
坐标系说明:
|
||||
- X 方向:两个夹爪朝中心合并的方向
|
||||
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
|
||||
- Z 方向:高度方向
|
||||
"""
|
||||
lid: RigidObject = env.scene[lid_cfg.name]
|
||||
robot: Articulation = env.scene[robot_cfg.name]
|
||||
|
||||
lid_pos_w = lid.data.root_pos_w[:, :3]
|
||||
|
||||
# 获取两个夹爪的位置
|
||||
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
|
||||
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
|
||||
|
||||
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
|
||||
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
|
||||
|
||||
# 计算夹爪中心位置
|
||||
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
|
||||
rel_pos = gripper_center - lid_pos_w
|
||||
|
||||
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance)
|
||||
y_dist = torch.abs(rel_pos[:, 1])
|
||||
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
|
||||
|
||||
# X 方向:应该对齐(接近 0)
|
||||
x_dist = torch.abs(rel_pos[:, 0])
|
||||
x_ok = x_dist < alignment_tolerance
|
||||
|
||||
# Z 方向:应该在 lid 上方 height_offset 处
|
||||
z_error = torch.abs(rel_pos[:, 2] - height_offset)
|
||||
z_ok = z_error < height_tolerance
|
||||
|
||||
# 所有条件都要满足
|
||||
return x_ok & y_ok & z_ok
|
||||
|
||||
|
||||
def base_height_failure(env: ManagerBasedRLEnv,
|
||||
minimum_height: float | None = None,
|
||||
maximum_height: float | None = None,
|
||||
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
|
||||
"""Terminate when the robot's base height is outside the specified range."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
root_pos_z = asset.data.root_pos_w[:, 2]
|
||||
|
||||
# check if height is outside the range
|
||||
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
|
||||
if minimum_height is not None:
|
||||
out_of_bounds |= root_pos_z < minimum_height
|
||||
if maximum_height is not None:
|
||||
out_of_bounds |= root_pos_z > maximum_height
|
||||
|
||||
return out_of_bounds
|
||||
|
||||
@@ -0,0 +1,578 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
from re import A, T
|
||||
from tkinter import N
|
||||
import torch
|
||||
from isaaclab.actuators import ImplicitActuatorCfg
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
|
||||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||||
from isaaclab.managers import EventTermCfg as EventTerm
|
||||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||||
from isaaclab.managers import CurriculumTermCfg as CurrTerm
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.utils import configclass
|
||||
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
|
||||
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
|
||||
from isaaclab.sensors import CameraCfg
|
||||
|
||||
from . import mdp
|
||||
|
||||
##
|
||||
# Pre-defined configs
|
||||
##
|
||||
|
||||
from mindbot.robot.mindbot import MINDBOT_CFG
|
||||
|
||||
# ====== 其他物体配置 ======
|
||||
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
|
||||
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
|
||||
|
||||
|
||||
TABLE_CFG=RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Table",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.95, -0.3, 0.005],
|
||||
rot=[0.7071, 0.0, 0.0, 0.7071],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
LID_CFG = RigidObjectCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Lid",
|
||||
init_state=RigidObjectCfg.InitialStateCfg(
|
||||
pos=[0.2, 0.65, 0.9],
|
||||
rot=[1.0, 0.0, 0.0, 0.0],
|
||||
lin_vel=[0.0, 0.0, 0.0],
|
||||
ang_vel=[0.0, 0.0, 0.0],
|
||||
),
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
|
||||
# scale=(0.2, 0.2, 0.2),
|
||||
rigid_props=RigidBodyPropertiesCfg(
|
||||
rigid_body_enabled= True,
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
max_angular_velocity=1000.0,
|
||||
max_linear_velocity=1000.0,
|
||||
max_depenetration_velocity=0.5,#original 5.0
|
||||
linear_damping=5.0, #original 0.5
|
||||
angular_damping=5.0, #original 0.5
|
||||
disable_gravity=False,
|
||||
),
|
||||
collision_props=CollisionPropertiesCfg(
|
||||
collision_enabled=True,
|
||||
contact_offset=0.0005,#original 0.02
|
||||
rest_offset=0
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
ROOM_CFG = AssetBaseCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Room",
|
||||
spawn=UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
|
||||
),
|
||||
)
|
||||
|
||||
# ROOM_CFG = RigidObjectCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Room",
|
||||
# init_state=RigidObjectCfg.InitialStateCfg(
|
||||
# pos=[0.0, 0.0, 0.0],
|
||||
# rot=[1.0, 0.0, 0.0, 0.0],
|
||||
# lin_vel=[0.0, 0.0, 0.0],
|
||||
# ang_vel=[0.0, 0.0, 0.0],
|
||||
# ),
|
||||
# spawn=UsdFileCfg(
|
||||
# usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
|
||||
# # scale=(0.2, 0.2, 0.2),
|
||||
# rigid_props=RigidBodyPropertiesCfg(
|
||||
# rigid_body_enabled= True,
|
||||
# solver_position_iteration_count=32,
|
||||
# solver_velocity_iteration_count=16,
|
||||
# max_angular_velocity=1000.0,
|
||||
# max_linear_velocity=1000.0,
|
||||
# max_depenetration_velocity=0.5,#original 5.0
|
||||
# linear_damping=5.0, #original 0.5
|
||||
# angular_damping=5.0, #original 0.5
|
||||
# disable_gravity=False,
|
||||
# ),
|
||||
# collision_props=CollisionPropertiesCfg(
|
||||
# collision_enabled=True,
|
||||
# contact_offset=0.0005,#original 0.02
|
||||
# rest_offset=0
|
||||
# ),
|
||||
# ),
|
||||
# )
|
||||
|
||||
|
||||
|
||||
ULTRASOUND_CFG = ArticulationCfg(
|
||||
spawn=sim_utils.UsdFileCfg(
|
||||
usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
|
||||
rigid_props=sim_utils.RigidBodyPropertiesCfg(
|
||||
disable_gravity=False,
|
||||
max_depenetration_velocity=1.0
|
||||
),
|
||||
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
|
||||
enabled_self_collisions=False,#
|
||||
solver_position_iteration_count=32,
|
||||
solver_velocity_iteration_count=16,
|
||||
stabilization_threshold=1e-6,
|
||||
# fix_root_link=True,
|
||||
),
|
||||
),
|
||||
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
|
||||
# actuators={}
|
||||
actuators={
|
||||
"passive_damper": ImplicitActuatorCfg(
|
||||
# ".*" 表示匹配该USD文件内的所有关节(无论是轮子、屏幕转轴还是其他)
|
||||
joint_names_expr=[".*"],
|
||||
|
||||
stiffness=0.0,
|
||||
damping=1000.0,
|
||||
effort_limit_sim=10000.0,
|
||||
velocity_limit_sim=100.0,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
##
|
||||
# Scene definition
|
||||
##
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotSceneCfg(InteractiveSceneCfg):
|
||||
"""Configuration for a cart-pole scene."""
|
||||
|
||||
# ground plane
|
||||
ground = AssetBaseCfg(
|
||||
prim_path="/World/ground",
|
||||
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
|
||||
)
|
||||
|
||||
# robot
|
||||
Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
|
||||
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
|
||||
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
|
||||
ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
|
||||
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
|
||||
# lights
|
||||
dome_light = AssetBaseCfg(
|
||||
prim_path="/World/DomeLight",
|
||||
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
|
||||
)
|
||||
# head_camera=CameraCfg(
|
||||
# prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
# update_period=1/120,
|
||||
# height=480,
|
||||
# width=640,
|
||||
# data_type=["rgb"],
|
||||
# spawn=None,
|
||||
# )
|
||||
left_hand_camera=CameraCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
|
||||
update_period=1/120,
|
||||
height=480,
|
||||
width=640,
|
||||
data_types=["rgb"],
|
||||
spawn=None,
|
||||
)
|
||||
right_hand_camera=CameraCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
|
||||
update_period=1/120,
|
||||
height=480,
|
||||
width=640,
|
||||
data_types=["rgb"],
|
||||
spawn=None,
|
||||
)
|
||||
head_camera=CameraCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
|
||||
update_period=1/120,
|
||||
height=480,
|
||||
width=640,
|
||||
data_types=["rgb"],
|
||||
spawn=None,
|
||||
)
|
||||
chest_camera=CameraCfg(
|
||||
prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
|
||||
update_period=1/120,
|
||||
height=480,
|
||||
width=640,
|
||||
data_types=["rgb"],
|
||||
spawn=None,
|
||||
)
|
||||
##
|
||||
# MDP settings
|
||||
##
|
||||
|
||||
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
|
||||
asset = env.scene[asset_cfg.name]
|
||||
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
|
||||
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
|
||||
return pos_w
|
||||
|
||||
|
||||
@configclass
|
||||
class ActionsCfg:
|
||||
"""Action specifications for the MDP."""
|
||||
|
||||
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量(6维)
|
||||
left_arm_ee = DifferentialInverseKinematicsActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["l_joint[1-6]"], # 左臂的6个关节
|
||||
body_name="left_hand_body", # 末端执行器body名称
|
||||
controller=DifferentialIKControllerCfg(
|
||||
command_type="pose", # 控制位置+姿态
|
||||
use_relative_mode=True, # 相对模式:动作是增量
|
||||
ik_method="dls", # Damped Least Squares方法
|
||||
),
|
||||
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
|
||||
)
|
||||
|
||||
# right_arm_fixed = mdp.JointPositionActionCfg(
|
||||
# asset_name="Mindbot",
|
||||
# joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
|
||||
|
||||
# # 1. 缩放设为 0:这让 RL 策略无法控制它,它不会动
|
||||
# scale=0.0,
|
||||
|
||||
# # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
|
||||
# # 对应 (135, -70, -90, 90, 90, -70)
|
||||
# offset={
|
||||
# "r_joint1": 2.3562,
|
||||
# "r_joint2": -1.2217,
|
||||
# "r_joint3": -1.5708,
|
||||
# "r_joint4": 1.5708,
|
||||
# "r_joint5": 1.5708,
|
||||
# "r_joint6": -1.2217,
|
||||
# },
|
||||
# )
|
||||
|
||||
grippers_position = mdp.BinaryJointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
|
||||
|
||||
# 修正:使用字典格式
|
||||
# open_command_expr={"关节名正则": 值}
|
||||
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
|
||||
|
||||
# close_command_expr={"关节名正则": 值}
|
||||
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
|
||||
)
|
||||
|
||||
|
||||
trunk_position = mdp.JointPositionActionCfg(
|
||||
asset_name="Mindbot",
|
||||
joint_names=["PrismaticJoint"],
|
||||
scale=0.00,
|
||||
offset=0.3,
|
||||
clip=None
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class ObservationsCfg:
|
||||
"""Observation specifications for the MDP."""
|
||||
|
||||
@configclass
|
||||
class PolicyCfg(ObsGroup):
|
||||
"""Observations for policy group."""
|
||||
|
||||
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
|
||||
|
||||
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
|
||||
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
|
||||
ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.enable_corruption = False
|
||||
self.concatenate_terms = True
|
||||
|
||||
# observation groups
|
||||
policy: PolicyCfg = PolicyCfg()
|
||||
|
||||
|
||||
@configclass
|
||||
class EventCfg:
|
||||
"""Configuration for events."""
|
||||
|
||||
# 重置所有关节到 init_state(无偏移)must be
|
||||
reset_mindbot_all_joints = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot"),
|
||||
"position_range": (0.0, 0.0),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
# 2. 底座安装误差 (建议缩小到 2cm)
|
||||
reset_mindbot_base = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot"),
|
||||
"pose_range": {
|
||||
"x": (-0.05, 0.05),
|
||||
"y": (-0.05, 0.05),
|
||||
"z": (0.74, 0.75),
|
||||
"yaw": (-0.1, 0.1),
|
||||
},
|
||||
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
|
||||
},
|
||||
)
|
||||
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
|
||||
reset_mindbot_left_arm = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
|
||||
"position_range": (-0.01, 0.01),
|
||||
"velocity_range": (0.0, 0.0),
|
||||
},
|
||||
)
|
||||
|
||||
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
|
||||
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
|
||||
reset_ultrasound = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("ultrasound"),
|
||||
"pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
|
||||
"velocity_range": {"x": (0.0, 0.0)}
|
||||
}
|
||||
)
|
||||
reset_lid = EventTerm(
|
||||
func=mdp.reset_root_state_uniform,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("lid"),
|
||||
"pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
|
||||
"velocity_range": {"x": (0.0, 0.0)}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class RewardsCfg:
|
||||
"""Reward terms for the MDP."""
|
||||
|
||||
gripper_lid_orientation_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_orientation_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"gripper_body_name": "left_hand_body",
|
||||
"gripper_forward_axis": (0.0, 0.0, 1.0),
|
||||
"gripper_width_axis": (0.0, 1.0, 0.0),
|
||||
"lid_handle_axis": (0.0, 1.0, 0.0),
|
||||
"max_angle_deg": 85.0, # 允许60度内的偏差
|
||||
},
|
||||
weight=5.0 #original 5.0
|
||||
)
|
||||
|
||||
# stage 2
|
||||
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
|
||||
gripper_lid_position_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_position_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07, # Z方向:lid 上方 0.1m
|
||||
"std": 0.3, # 位置对齐的容忍度
|
||||
},
|
||||
weight=3.0 #original 3.0
|
||||
)
|
||||
approach_lid_penalty = RewTerm(
|
||||
func=mdp.gripper_lid_distance_penalty,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07,
|
||||
},
|
||||
# weight 为负数表示惩罚。
|
||||
# 假设距离 0.5m,惩罚就是 -0.5 * 2.0 = -1.0 分。
|
||||
weight=-1.0
|
||||
)
|
||||
# 【新增】精细对齐 (引导进入 2cm 圈)
|
||||
gripper_lid_fine_alignment = RewTerm(
|
||||
func=mdp.gripper_lid_position_alignment,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
|
||||
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
|
||||
},
|
||||
weight=10.0 # 高权重
|
||||
)
|
||||
|
||||
gripper_close_interaction = RewTerm(
|
||||
func=mdp.gripper_close_when_near,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"robot_cfg": SceneEntityCfg("Mindbot"),
|
||||
"left_gripper_name": "left_hand__l",
|
||||
"right_gripper_name": "left_hand_r",
|
||||
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
|
||||
"target_close_pos": 0.03,
|
||||
"height_offset": 0.04,
|
||||
"grasp_radius": 0.03,
|
||||
},
|
||||
weight=10.0
|
||||
)
|
||||
|
||||
lid_lifted_reward = RewTerm(
|
||||
func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
|
||||
},
|
||||
weight=10.0 # 给一个足够大的权重
|
||||
)
|
||||
|
||||
lid_lifting_reward = RewTerm(
|
||||
func=mdp.lid_lift_progress_reward,
|
||||
params={
|
||||
"lid_cfg": SceneEntityCfg("lid"),
|
||||
"initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
|
||||
"target_height_lift": 0.2,
|
||||
"height_offset": 0.07, # 与其他奖励保持一致
|
||||
"std": 0.1
|
||||
},
|
||||
# 权重设大一点,告诉它这是最终目标
|
||||
weight=10.0
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@configclass
|
||||
class TerminationsCfg:
|
||||
"""Termination terms for the MDP."""
|
||||
|
||||
time_out = DoneTerm(func=mdp.time_out, time_out=True)
|
||||
|
||||
mindbot_fly_away = DoneTerm(
|
||||
func=mdp.base_height_failure,
|
||||
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
|
||||
)
|
||||
|
||||
lid_fly_away = DoneTerm(
|
||||
func=mdp.base_height_failure,
|
||||
params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
|
||||
)
|
||||
|
||||
# 新增:盖子掉落判定
|
||||
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
|
||||
lid_dropped = DoneTerm(
|
||||
func=mdp.base_height_failure, # 复用高度判定函数
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("lid"),
|
||||
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
||||
##
|
||||
# Environment configuration
|
||||
##
|
||||
|
||||
@configclass
|
||||
class CurriculumCfg:
|
||||
pass
|
||||
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
|
||||
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
|
||||
|
||||
# position_std_scheduler = CurrTerm(
|
||||
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
|
||||
# params={
|
||||
# # 路径:rewards管理器 -> 你的奖励项名 -> params字典 -> std键
|
||||
# "address": "rewards.gripper_lid_position_alignment.params.std",
|
||||
|
||||
# # 修改逻辑:使用我们刚才写的函数
|
||||
# "modify_fn": mdp.annealing_std,
|
||||
|
||||
# # 传给函数的参数
|
||||
# "modify_params": {
|
||||
# "start_step": 00, # 约 600 轮
|
||||
# "end_step": 5_000, # 约 1200 轮
|
||||
# "start_std": 0.3,
|
||||
# "end_std": 0.05,
|
||||
# }
|
||||
# }
|
||||
# )
|
||||
|
||||
@configclass
|
||||
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
|
||||
# Scene settings
|
||||
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=5.0)
|
||||
# Basic settings
|
||||
observations: ObservationsCfg = ObservationsCfg()
|
||||
actions: ActionsCfg = ActionsCfg()
|
||||
events: EventCfg = EventCfg()
|
||||
# MDP settings
|
||||
rewards: RewardsCfg = RewardsCfg()
|
||||
terminations: TerminationsCfg = TerminationsCfg()
|
||||
# curriculum: CurriculumCfg = CurriculumCfg()
|
||||
|
||||
# Post initialization
|
||||
def __post_init__(self) -> None:
|
||||
"""Post initialization."""
|
||||
# general settings
|
||||
self.decimation = 4 #original 2 # 从 2 增加到 4,控制频率从 25Hz 降到 12.5Hz
|
||||
self.episode_length_s = 10
|
||||
# viewer settings
|
||||
self.viewer.eye = (0, 2.5, 2.6)#(3.5, 0.0, 3.2)
|
||||
# simulation settings
|
||||
self.sim.dt = 1 / 120 #original 1 / 60
|
||||
self.sim.render_interval = self.decimation
|
||||
# # 1. 基础堆内存
|
||||
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
|
||||
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
|
||||
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
|
||||
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
|
||||
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
|
||||
|
||||
# # 5. 聚合对容量 (针对复杂的 Articulation)
|
||||
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
|
||||
282
test.py
Normal file
282
test.py
Normal file
@@ -0,0 +1,282 @@
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
import h5py
|
||||
import torch
|
||||
|
||||
# 先引入 AppLauncher,并尽早实例化 SimulationApp,避免 pxr 未加载
|
||||
from isaaclab.app import AppLauncher
|
||||
import cli_args # isort: skip
|
||||
|
||||
# CLI
|
||||
parser = argparse.ArgumentParser(description="Play an RL agent with multi-cam recording.")
|
||||
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
|
||||
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
|
||||
parser.add_argument("--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations.")
|
||||
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
|
||||
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
|
||||
parser.add_argument("--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point.")
|
||||
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
|
||||
parser.add_argument("--use_pretrained_checkpoint", action="store_true", help="Use the pre-trained checkpoint from Nucleus.")
|
||||
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
|
||||
parser.add_argument("--max_steps", type=int, default=None, help="最大步数,达到后提前退出")
|
||||
cli_args.add_rsl_rl_args(parser)
|
||||
AppLauncher.add_app_launcher_args(parser)
|
||||
args_cli, hydra_args = parser.parse_known_args()
|
||||
if args_cli.video:
|
||||
args_cli.enable_cameras = True
|
||||
# 将 hydra 剩余参数传递
|
||||
sys.argv = [sys.argv[0]] + hydra_args
|
||||
|
||||
# ==== 先实例化 SimulationApp ====
|
||||
app_launcher = AppLauncher(args_cli)
|
||||
simulation_app = app_launcher.app
|
||||
|
||||
# ==== 之后再 import 依赖 isaac/pxr 的模块 ====
|
||||
import gymnasium as gym
|
||||
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
|
||||
from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
|
||||
from isaaclab.utils.assets import retrieve_file_path
|
||||
from isaaclab.utils.dict import print_dict
|
||||
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
|
||||
from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
|
||||
|
||||
import isaaclab_tasks # noqa: F401
|
||||
from isaaclab_tasks.utils import get_checkpoint_path
|
||||
from isaaclab_tasks.utils.hydra import hydra_task_config
|
||||
import mindbot.tasks # noqa: F401
|
||||
|
||||
CAM_NAMES = ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand", "cam_top", "cam_side"]
|
||||
|
||||
|
||||
@hydra_task_config(args_cli.task, args_cli.agent)
|
||||
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
|
||||
task_name = args_cli.task.split(":")[-1]
|
||||
train_task_name = task_name.replace("-Play", "")
|
||||
|
||||
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
|
||||
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
|
||||
env_cfg.seed = agent_cfg.seed
|
||||
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
|
||||
|
||||
log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
|
||||
print(f"[INFO] Loading experiment from directory: {log_root_path}")
|
||||
if args_cli.use_pretrained_checkpoint:
|
||||
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
|
||||
if not resume_path:
|
||||
print("[INFO] No pre-trained checkpoint for this task.")
|
||||
return
|
||||
elif args_cli.checkpoint:
|
||||
resume_path = retrieve_file_path(args_cli.checkpoint)
|
||||
else:
|
||||
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
|
||||
|
||||
log_dir = os.path.dirname(resume_path)
|
||||
env_cfg.log_dir = log_dir
|
||||
|
||||
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
|
||||
if isinstance(env.unwrapped, DirectMARLEnv):
|
||||
env = multi_agent_to_single_agent(env)
|
||||
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
|
||||
|
||||
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
|
||||
if agent_cfg.class_name == "OnPolicyRunner":
|
||||
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
elif agent_cfg.class_name == "DistillationRunner":
|
||||
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
|
||||
else:
|
||||
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
|
||||
runner.load(resume_path)
|
||||
|
||||
policy = runner.get_inference_policy(device=env.unwrapped.device)
|
||||
try:
|
||||
policy_nn = runner.alg.policy
|
||||
except AttributeError:
|
||||
policy_nn = runner.alg.actor_critic
|
||||
|
||||
# 导出模型
|
||||
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
|
||||
normalizer = getattr(policy_nn, "actor_obs_normalizer", getattr(policy_nn, "student_obs_normalizer", None))
|
||||
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
|
||||
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
|
||||
|
||||
# dt = env.unwrapped.step_dt
|
||||
|
||||
# # 录制缓冲
|
||||
# cam_buffers = {n: [] for n in CAM_NAMES}
|
||||
# joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
|
||||
# t0 = time.time()
|
||||
|
||||
# obs = env.get_observations()
|
||||
# timestep = 0
|
||||
# step_count = 0
|
||||
# try:
|
||||
# while simulation_app.is_running():
|
||||
# start_time = time.time()
|
||||
# with torch.inference_mode():
|
||||
# actions = policy(obs)
|
||||
# obs, _, dones, _ = env.step(actions)
|
||||
# policy_nn.reset(dones)
|
||||
|
||||
# # 相机帧(取 env0,如需全部 env 去掉 [0])
|
||||
# for name in CAM_NAMES:
|
||||
# if name not in env.unwrapped.scene.sensors:
|
||||
# continue
|
||||
# cam = env.unwrapped.scene.sensors[name]
|
||||
# rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
|
||||
# if rgba is None:
|
||||
# continue
|
||||
# frame = rgba[0].cpu().numpy()
|
||||
# if frame.shape[-1] == 4:
|
||||
# frame = frame[..., :3]
|
||||
# cam_buffers[name].append(frame)
|
||||
|
||||
# # 关节 / 速度 / 动作
|
||||
# robot = env.unwrapped.scene["Mindbot"]
|
||||
# joint_log.append(robot.data.joint_pos.cpu().numpy())
|
||||
# joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
|
||||
# action_log.append(actions.cpu().numpy())
|
||||
# ts_log.append(time.time() - t0)
|
||||
|
||||
# step_count += 1
|
||||
# if args_cli.max_steps and step_count >= args_cli.max_steps:
|
||||
# break
|
||||
|
||||
# if args_cli.video:
|
||||
# timestep += 1
|
||||
# if timestep == args_cli.video_length:
|
||||
# break
|
||||
|
||||
# sleep_time = dt - (time.time() - start_time)
|
||||
# if args_cli.real_time and sleep_time > 0:
|
||||
# time.sleep(sleep_time)
|
||||
# finally:
|
||||
# # 保存 HDF5
|
||||
# h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
|
||||
# with h5py.File(h5_path, "w") as f:
|
||||
# f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
|
||||
# f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
|
||||
# f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
|
||||
# f.create_dataset("timestamps", data=np.array(ts_log))
|
||||
# for name, frames in cam_buffers.items():
|
||||
# if not frames:
|
||||
# continue
|
||||
# dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
|
||||
# if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
|
||||
# fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
|
||||
# else:
|
||||
# fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
|
||||
# dset.attrs["fx"] = fx
|
||||
# dset.attrs["fy"] = fy
|
||||
# dset.attrs["cx"] = cx
|
||||
# dset.attrs["cy"] = cy
|
||||
# dset.attrs["focal_length"] = 1.93
|
||||
# print(f"[INFO] Saved HDF5 to {h5_path}")
|
||||
|
||||
# # 可选:单路 MP4
|
||||
# head_frames = cam_buffers["cam_head"]
|
||||
# if head_frames:
|
||||
# fps = int(round(1.0 / dt))
|
||||
# video_path = os.path.join(log_dir, "cam_head.mp4")
|
||||
# imageio.mimsave(video_path, head_frames, fps=fps)
|
||||
# print(f"[INFO] Saved video to {video_path}")
|
||||
|
||||
# env.close()
|
||||
dt = env.unwrapped.step_dt
|
||||
|
||||
# 录制缓冲
|
||||
cam_buffers = {n: [] for n in CAM_NAMES}
|
||||
joint_log, joint_vel_log, action_log, ts_log = [], [], [], []
|
||||
t0 = time.time()
|
||||
|
||||
obs = env.get_observations()
|
||||
timestep = 0
|
||||
while simulation_app.is_running():
|
||||
start_time = time.time()
|
||||
with torch.inference_mode():
|
||||
actions = policy(obs)
|
||||
obs, _, dones, _ = env.step(actions)
|
||||
policy_nn.reset(dones)
|
||||
|
||||
# 相机帧(取 env0,如需全部 env 去掉 [0])
|
||||
for name in CAM_NAMES:
|
||||
if name not in env.unwrapped.scene.sensors:
|
||||
continue
|
||||
cam = env.unwrapped.scene.sensors[name]
|
||||
rgba = cam.data.output.get("rgba", cam.data.output.get("rgb"))
|
||||
if rgba is None:
|
||||
continue
|
||||
frame = rgba[0].cpu().numpy()
|
||||
if frame.shape[-1] == 4:
|
||||
frame = frame[..., :3]
|
||||
cam_buffers[name].append(frame)
|
||||
|
||||
# 关节 / 速度 / 动作
|
||||
robot = env.unwrapped.scene["Mindbot"]
|
||||
joint_log.append(robot.data.joint_pos.cpu().numpy())
|
||||
joint_vel_log.append(robot.data.joint_vel.cpu().numpy())
|
||||
action_log.append(actions.cpu().numpy())
|
||||
ts_log.append(time.time() - t0)
|
||||
|
||||
if args_cli.video:
|
||||
timestep += 1
|
||||
if timestep == args_cli.video_length:
|
||||
break
|
||||
|
||||
sleep_time = dt - (time.time() - start_time)
|
||||
if args_cli.real_time and sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
# 保存 HDF5
|
||||
h5_path = os.path.join(log_dir, "rollout_multi_cam.h5")
|
||||
with h5py.File(h5_path, "w") as f:
|
||||
f.create_dataset("joint_pos", data=np.stack(joint_log), compression="gzip")
|
||||
f.create_dataset("joint_vel", data=np.stack(joint_vel_log), compression="gzip")
|
||||
f.create_dataset("actions", data=np.stack(action_log), compression="gzip")
|
||||
f.create_dataset("timestamps", data=np.array(ts_log))
|
||||
for name, frames in cam_buffers.items():
|
||||
if not frames:
|
||||
continue
|
||||
dset = f.create_dataset(f"cams/{name}/rgb", data=np.stack(frames), compression="gzip")
|
||||
# 内参:按你相机 vector 设置
|
||||
if name in ["cam_head", "cam_chest", "cam_left_hand", "cam_right_hand"]:
|
||||
fx, fy, cx, cy = 911.77, 911.5, 624.07, 364.05
|
||||
else:
|
||||
fx, fy, cx, cy = 458.7488, 458.8663, 323.3297, 240.6295
|
||||
dset.attrs["fx"] = fx
|
||||
dset.attrs["fy"] = fy
|
||||
dset.attrs["cx"] = cx
|
||||
dset.attrs["cy"] = cy
|
||||
dset.attrs["focal_length"] = 1.93
|
||||
print(f"[INFO] Saved HDF5 to {h5_path}")
|
||||
|
||||
# 可选:单路 MP4
|
||||
head_frames = cam_buffers["cam_head"]
|
||||
if head_frames:
|
||||
fps = int(round(1.0 / dt))
|
||||
video_path = os.path.join(log_dir, "cam_head.mp4")
|
||||
imageio.mimsave(video_path, head_frames, fps=fps)
|
||||
print(f"[INFO] Saved video to {video_path}")
|
||||
|
||||
env.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
simulation_app.close()
|
||||
|
||||
# import h5py
|
||||
|
||||
# h5_path = r"C:\Users\PC\workpalce\mindbot\logs\rsl_rl\mindbot_grasp\2026-01-15_11-51-00\rollout_multi_cam.h5"
|
||||
|
||||
# with h5py.File(h5_path, "r") as f:
|
||||
# def walk(name, obj):
|
||||
# if isinstance(obj, h5py.Dataset):
|
||||
# print(f"{name}: shape={obj.shape}, dtype={obj.dtype}")
|
||||
# else:
|
||||
# print(f"{name}/")
|
||||
# f.visititems(walk)
|
||||
73
test_sence.py
Normal file
73
test_sence.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from isaaclab.app import AppLauncher
|
||||
import argparse
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1. 启动 Isaac Sim
|
||||
# -----------------------------------------------------------------------------
|
||||
parser = argparse.ArgumentParser()
|
||||
AppLauncher.add_app_launcher_args(parser)
|
||||
args = parser.parse_args()
|
||||
|
||||
app_launcher = AppLauncher(args)
|
||||
simulation_app = app_launcher.app
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 2. 必须先创建 SimulationContext(PhysX 在这里初始化)
|
||||
# -----------------------------------------------------------------------------
|
||||
import isaaclab.sim as sim_utils
|
||||
|
||||
sim_cfg = sim_utils.SimulationCfg(
|
||||
dt=1 / 120,
|
||||
device=args.device,
|
||||
)
|
||||
sim = sim_utils.SimulationContext(sim_cfg)
|
||||
|
||||
# (可选)设置相机
|
||||
sim.set_camera_view(
|
||||
eye=[3.0, 0.0, 2.0],
|
||||
target=[0.0, 0.0, 0.5],
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 3. 再 play timeline(这一步非常关键)
|
||||
# -----------------------------------------------------------------------------
|
||||
import omni.timeline
|
||||
|
||||
timeline = omni.timeline.get_timeline_interface()
|
||||
timeline.play()
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 4. 现在才可以安全创建 Env
|
||||
# -----------------------------------------------------------------------------
|
||||
import torch
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
from mindbot.tasks.manager_based.pull.pull_env_cfg import PullEnvCfg
|
||||
|
||||
|
||||
def main():
|
||||
env_cfg = PullEnvCfg()
|
||||
env_cfg.scene.num_envs = 1
|
||||
|
||||
env = ManagerBasedRLEnv(cfg=env_cfg)
|
||||
|
||||
obs, _ = env.reset()
|
||||
print("[OK] Env reset")
|
||||
|
||||
for step in range(100000):
|
||||
actions = torch.zeros(
|
||||
env.num_envs,
|
||||
env.action_manager.total_action_dim,
|
||||
device=env.device,
|
||||
)
|
||||
obs, rew, done, trunc, info = env.step(actions)
|
||||
|
||||
if step % 100 == 0:
|
||||
root_pos = env.scene["Mindbot"].data.root_pos_w[0]
|
||||
print(f"step {step}, root z = {root_pos[2].item():.3f}")
|
||||
|
||||
print("[DONE]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
simulation_app.close()
|
||||
Reference in New Issue
Block a user