4090工作站代码

This commit is contained in:
2026-01-28 20:16:58 +08:00
parent 1f7053a306
commit 2a201d5bab
98 changed files with 9887 additions and 1063 deletions

View File

@@ -2,11 +2,11 @@
// See http://go.microsoft.com/fwlink/?LinkId=827846
// for the documentation about the extensions.json format
"recommendations": [
"ms-vscode.cpptools",
"ms-python.python",
"ms-python.vscode-pylance",
"ms-python.vscode-pylance",
"ban.spellright",
"ms-iot.vscode-ros",
"ms-python.black-formatter",
"ms-python.flake8",
"ExecutableBookProject.myst-highlight",
]
}

28
.vscode/tasks.json vendored
View File

@@ -1,23 +1,29 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
// setup python env
"label": "setup_python_env",
"type": "shell",
"linux": {
"command": "${input:isaac_path}/python.sh ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
"command": "${workspaceFolder}/isaaclab.sh -p ${workspaceFolder}/.vscode/tools/setup_vscode.py"
},
"windows": {
"command": "${input:isaac_path}/python.bat ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
"command": "${workspaceFolder}/isaaclab.bat -p ${workspaceFolder}/.vscode/tools/setup_vscode.py"
}
},
{
// run formatter
"label": "run_formatter",
"type": "shell",
"linux": {
"command": "${workspaceFolder}/isaaclab.sh --format"
},
"windows": {
"command": "${workspaceFolder}/isaaclab.bat --format"
}
}
],
"inputs": [
{
"id": "isaac_path",
"description": "Absolute path to the current Isaac Sim installation. If you installed IsaacSim from pip, the import of it failed. Please make sure you run the task with the correct python environment. As fallback, you can directly execute the python script by running: ``python.sh <path-to-your-project>/.vscode/tools/setup_vscode.py``",
"default": "${HOME}/isaacsim",
"type": "promptString"
},
]
}
}

View File

@@ -4,822 +4,54 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
// For standalone script execution
{
"name": "Python: Current File",
"type": "debugpy",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"console": "integratedTerminal"
},
{
"name": "Python: Play Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"name": "Python: Attach (windows-x86_64/linux-x86_64)",
"type": "python",
"request": "attach",
"port": 3000,
"host": "localhost"
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"name": "Python: Train Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"args" : ["--task", "Isaac-Reach-Franka-v0", "--headless"],
"program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/train.py",
"console": "integratedTerminal"
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"name": "Python: Play Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"args" : ["--task", "Isaac-Reach-Franka-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/play.py",
"console": "integratedTerminal"
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"name": "Python: SinglePytest",
"type": "python",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"module": "pytest",
"args": [
"${file}"
],
"console": "integratedTerminal"
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"name": "Python: ALL Pytest",
"type": "python",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"module": "pytest",
"args": ["source/isaaclab/test"],
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
// For script execution inside a Docker
{
"name": "Docker: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
}
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
"justMyCode": false
}
]
}
}

View File

@@ -1,4 +1,9 @@
{
"files.exclude": {
"**/.mypy_cache": true,
"**/__pycache__": true,
"**/*.egg-info": true
},
"files.associations": {
"*.tpp": "cpp",
"*.kit": "toml",
@@ -40,7 +45,6 @@
"teleoperation",
"xform",
"numpy",
"tensordict",
"flatcache",
"physx",
"dpad",
@@ -51,7 +55,9 @@
"arange",
"discretization",
"trimesh",
"uninstanceable"
"uninstanceable",
"coeff",
"prestartup"
],
// This enables python language server. Seems to work slightly better than jedi:
"python.languageServer": "Pylance",
@@ -70,7 +76,7 @@
// Python environment path
// note: the default interpreter is overridden when user selects a workspace interpreter
// in the status bar. For example, the virtual environment python interpreter
"python.defaultInterpreterPath": "",
"python.defaultInterpreterPath": "${workspaceFolder}/_isaac_sim/python.sh",
// ROS distribution
"ros.distro": "noetic",
// Language specific settings
@@ -81,6 +87,6 @@
"editor.tabSize": 2
},
// Python extra paths
// Note: this is filled up when vscode is set up for the first time
// Note: this is filled up when "./isaaclab.sh -i" is run
"python.analysis.extraPaths": []
}

View File

@@ -12,34 +12,21 @@ This is necessary because Isaac Sim 2022.2.1 onwards does not add the necessary
when the "setup_python_env.sh" is run as part of the vs-code launch configuration.
"""
import argparse
import os
import pathlib
import platform
import re
import sys
import os
import pathlib
PROJECT_DIR = pathlib.Path(__file__).parents[2]
"""Path to the the project directory."""
ISAACLAB_DIR = pathlib.Path(__file__).parents[2]
"""Path to the Isaac Lab directory."""
try:
import isaacsim # noqa: F401
isaacsim_dir = os.environ.get("ISAAC_PATH", "")
except ModuleNotFoundError or ImportError:
# Create a parser to get the isaac-sim path
parser = argparse.ArgumentParser(description="Setup the VSCode settings for the project.")
parser.add_argument("--isaac_path", type=str, help="The absolute path to the Isaac Sim installation.")
args = parser.parse_args()
# parse the isaac-sim directory
isaacsim_dir = args.isaac_path
# check if the isaac-sim directory is provided
if not os.path.exists(isaacsim_dir):
raise FileNotFoundError(
f"Could not find the isaac-sim directory: {isaacsim_dir}. Please provide the correct path to the Isaac Sim"
" installation."
)
isaacsim_dir = os.path.join(ISAACLAB_DIR, "_isaac_sim")
except EOFError:
print("Unable to trigger EULA acceptance. This is likely due to the script being run in a non-interactive shell.")
print("Please run the script in an interactive shell to accept the EULA.")
@@ -49,11 +36,11 @@ except EOFError:
# check if the isaac-sim directory exists
if not os.path.exists(isaacsim_dir):
raise FileNotFoundError(
f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:\n\t1. The"
" Isaac Sim directory does not exist as provided CLI path.\n\t2. The script couldn't import the 'isaacsim'"
" package. This could be due to the 'isaacsim' package not being installed in the Python"
" environment.\n\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is"
" installed."
f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:"
f"\n\t1. The Isaac Sim directory does not exist as a symlink at: {os.path.join(ISAACLAB_DIR, '_isaac_sim')}"
"\n\t2. The script could not import the 'isaacsim' package. This could be due to the 'isaacsim' package not "
"being installed in the Python environment.\n"
"\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is installed."
)
ISAACSIM_DIR = isaacsim_dir
@@ -98,7 +85,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
path_names = [path_name for path_name in path_names if len(path_name) > 0]
# change the path names to be relative to the Isaac Lab directory
rel_path = os.path.relpath(ISAACSIM_DIR, PROJECT_DIR)
rel_path = os.path.relpath(ISAACSIM_DIR, ISAACLAB_DIR)
path_names = ['"${workspaceFolder}/' + rel_path + "/" + path_name + '"' for path_name in path_names]
else:
path_names = []
@@ -111,7 +98,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
)
# add the path names that are in the Isaac Lab extensions directory
isaaclab_extensions = os.listdir(os.path.join(PROJECT_DIR, "source"))
isaaclab_extensions = os.listdir(os.path.join(ISAACLAB_DIR, "source"))
path_names.extend(['"${workspaceFolder}/source/' + ext + '"' for ext in isaaclab_extensions])
# combine them into a single string
@@ -144,17 +131,15 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
The settings string with overwritten default python interpreter.
"""
# read executable name
python_exe = os.path.normpath(sys.executable)
# replace with Isaac Sim's python.sh or python.bat scripts to make sure python with correct
# source paths is set as default
if f"kit{os.sep}python{os.sep}bin{os.sep}python" in python_exe:
# Check if the OS is Windows or Linux to use appropriate shell file
if platform.system() == "Windows":
python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.bat")
else:
python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.sh")
python_exe = sys.executable.replace("\\", "/")
# We make an exception for replacing the default interpreter if the
# path (/kit/python/bin/python3) indicates that we are using a local/container
# installation of IsaacSim. We will preserve the calling script as the default, python.sh.
# We want to use python.sh because it modifies LD_LIBRARY_PATH and PYTHONPATH
# (among other envars) that we need for all of our dependencies to be accessible.
if "kit/python/bin/python3" in python_exe:
return isaaclab_settings
# replace the default python interpreter in the Isaac Lab settings file with the path to the
# python interpreter in the Isaac Lab directory
isaaclab_settings = re.sub(
@@ -169,7 +154,7 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
def main():
# Isaac Lab template settings
isaaclab_vscode_template_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "settings.template.json")
isaaclab_vscode_template_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "settings.template.json")
# make sure the Isaac Lab template settings file exists
if not os.path.exists(isaaclab_vscode_template_filename):
raise FileNotFoundError(
@@ -195,13 +180,13 @@ def main():
isaaclab_settings = header_message + isaaclab_settings
# write the Isaac Lab settings file
isaaclab_vscode_filename = os.path.join(PROJECT_DIR, ".vscode", "settings.json")
isaaclab_vscode_filename = os.path.join(ISAACLAB_DIR, ".vscode", "settings.json")
with open(isaaclab_vscode_filename, "w") as f:
f.write(isaaclab_settings)
# copy the launch.json file if it doesn't exist
isaaclab_vscode_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "launch.json")
isaaclab_vscode_template_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "launch.template.json")
isaaclab_vscode_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "launch.json")
isaaclab_vscode_template_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "launch.template.json")
if not os.path.exists(isaaclab_vscode_launch_filename):
# read template launch settings
with open(isaaclab_vscode_template_launch_filename) as f:

View File

@@ -184,6 +184,22 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
# # 添加pdb断点来打印left_arm_ee
# import pdb
# original_step = env.step
# def step_with_debug(action):
# result = original_step(action)
# # 访问底层环境的action_manager
# unwrapped_env = env.unwrapped
# if hasattr(unwrapped_env, 'action_manager'):
# left_arm_ee_term = unwrapped_env.action_manager.get_term("left_arm_ee")
# if left_arm_ee_term is not None:
# print(f"left_arm_ee raw_actions: {left_arm_ee_term.raw_actions}")
# print(f"left_arm_ee processed_actions: {left_arm_ee_term.processed_actions}")
# pdb.set_trace() # 在这里设置断点
# return result
# env.step = step_with_debug
# create runner from rsl-rl
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)

View File

@@ -0,0 +1,14 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Python module serving as a project/extension template.
"""
# Register Gym environments.
from .tasks import *
# Register UI extensions.
from .ui_extension_example import *

View File

@@ -0,0 +1,109 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configuration for a simple Cartpole robot."""
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
##
# Configuration
##
MINDBOT_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
# 1. UPDATE THE USD PATH to your local file
usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True,
solver_position_iteration_count=8,
solver_velocity_iteration_count=4,
stabilization_threshold=1e-6, # 忽略小穿透
),
),
init_state=ArticulationCfg.InitialStateCfg(
# 2. DEFINE THE INITIAL POSITIONS FOR ALL 12 JOINTS
# All are set to 0.0 here, but you should adjust them for a suitable "home" position.
joint_pos={
# Left arm joints
"l_joint1": 0.0,
# "l_joint2": -1.5708,
"l_joint2": 0.0,
"l_joint3": 0.0,
"l_joint4": 0.0,
"l_joint5": 0.0,
"l_joint6": 0.0,
# Right arm joints
"r_joint1": 0.0,
"r_joint2": -1.5708,
"r_joint3": 0.0,
"r_joint4": 0.0,
"r_joint5": 0.0,
"r_joint6": 0.0,
# left wheel
"left_b_revolute_Joint": 0.0,
"left_f_revolute_Joint": 0.0,
# right wheel
"right_b_revolute_Joint": 0.0,
"right_f_revolute_Joint": 0.0,
# left gripper
# 注意:如果希望初始状态为完全打开,应该设置为 left=0.0, right=0.0
# 当前设置为 right=0.01 表示略微闭合状态
"left_hand_joint_left": 0.0, # 范围:(-0.03, 0.0)0.0=打开,-0.03=闭合
"left_hand_joint_right": 0.01, # 范围:(0.0, 0.03)0.0=打开0.03=闭合
# right gripper
"right_hand_joint_left": 0.0,
"right_hand_joint_right": 0.01,
# trunk
"PrismaticJoint": 0.3,
# head
"head_revoluteJoint": 0.5236
},
# The initial (x, y, z) position of the robot's base in the world
pos=(0, 0, 0.05),
),
actuators={
# 3. DEFINE ACTUATOR GROUPS FOR THE ARMS
# Group for the 6 left arm joints using a regular expression
"left_arm": ImplicitActuatorCfg(
joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=2.0, # Note: Tune this based on your robot's specs
stiffness=100000.0, # Note: Tune this for desired control performance
damping=10000.0, # Note: Tune this for desired control performance
),
# Group for the 6 right arm joints using a regular expression
"right_arm": ImplicitActuatorCfg(
joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
stiffness=100000.0, # Note: Tune this for desired control performance
damping=10000.0, # Note: Tune this for desired control performance
),
"head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
"trunk": ImplicitActuatorCfg(
joint_names_expr=["PrismaticJoint"],
stiffness=2000.0, # 从 10000 增:强持位
damping=200.0, # 从 1000 增:抗振荡
effort_limit_sim=1000.0,
velocity_limit_sim=0.5,
),
"wheels": ImplicitActuatorCfg(
joint_names_expr=[".*_revolute_Joint"],
stiffness=0.0,
damping=100.0,
effort_limit_sim=200.0, # <<<<<< 新增:力矩上限,足够驱动轮子
velocity_limit_sim=50.0, # <<<<<< 新增:速度上限,防止过快
),
"grippers": ImplicitActuatorCfg(joint_names_expr=[".*_hand_joint.*"], stiffness=1000.0, damping=100.0),
},
)

View File

@@ -0,0 +1,17 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Package containing task implementations for the extension."""
##
# Register Gym environments.
##
from isaaclab_tasks.utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["utils", ".mdp"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Direct-v0",
entry_point=f"{__name__}.mindbot_env:MindbotEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,135 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectRLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_env_cfg import MindbotEnvCfg
class MindbotEnv(DirectRLEnv):
cfg: MindbotEnvCfg
def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: torch.Tensor) -> None:
self.actions = actions.clone()
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
def _get_observations(self) -> dict:
obs = torch.cat(
(
self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
)
observations = {"policy": obs}
return observations
def _get_rewards(self) -> torch.Tensor:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_vel,
self.joint_pos[:, self._pole_dof_idx[0]],
self.joint_vel[:, self._pole_dof_idx[0]],
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
self.reset_terminated,
)
return total_reward
def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
return out_of_bounds, time_out
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_pole_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_vel: float,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
return total_reward

View File

@@ -0,0 +1,48 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectRLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotEnvCfg(DirectRLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# - spaces definition
action_space = 1
observation_space = 4
state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
# - action scale
action_scale = 100.0 # [N]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_pole_pos = -1.0
rew_scale_cart_vel = -0.01
rew_scale_pole_vel = -0.005
# - reset states/conditions
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Marl-Direct-v0",
entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,184 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectMARLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_marl_env_cfg import MindbotMarlEnvCfg
class MindbotMarlEnv(DirectMARLEnv):
cfg: MindbotMarlEnvCfg
def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
self.actions = actions
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(
self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx
)
self.robot.set_joint_effort_target(
self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx
)
def _get_observations(self) -> dict[str, torch.Tensor]:
pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1))
pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1))
observations = {
"cart": torch.cat(
(
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
pole_joint_pos,
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
"pendulum": torch.cat(
(
pole_joint_pos + pendulum_joint_pos,
pendulum_joint_pos,
self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
}
return observations
def _get_rewards(self) -> dict[str, torch.Tensor]:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_cart_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_pole_vel,
self.cfg.rew_scale_pendulum_pos,
self.cfg.rew_scale_pendulum_vel,
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
self.joint_vel[:, self._pole_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
self.joint_vel[:, self._pendulum_dof_idx[0]],
math.prod(self.terminated_dict.values()),
)
return total_reward
def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
time_outs = {agent: time_out for agent in self.cfg.possible_agents}
return terminated, time_outs
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_pos[:, self._pendulum_dof_idx] += sample_uniform(
self.cfg.initial_pendulum_angle_range[0] * math.pi,
self.cfg.initial_pendulum_angle_range[1] * math.pi,
joint_pos[:, self._pendulum_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def normalize_angle(angle):
return (angle + math.pi) % (2 * math.pi) - math.pi
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_cart_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_pos: float,
rew_scale_pole_vel: float,
rew_scale_pendulum_pos: float,
rew_scale_pendulum_vel: float,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
pendulum_pos: torch.Tensor,
pendulum_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
total_reward = {
"cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
"pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
}
return total_reward

View File

@@ -0,0 +1,55 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectMARLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotMarlEnvCfg(DirectMARLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# multi-agent specification and spaces definition
possible_agents = ["cart", "pendulum"]
action_spaces = {"cart": 1, "pendulum": 1}
observation_spaces = {"cart": 4, "pendulum": 3}
state_space = -1
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
pendulum_dof_name = "pole_to_pendulum"
# - action scale
cart_action_scale = 100.0 # [N]
pendulum_action_scale = 50.0 # [Nm]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_cart_pos = 0
rew_scale_cart_vel = -0.01
rew_scale_pole_pos = -1.0
rew_scale_pole_vel = -0.01
rew_scale_pendulum_pos = -1.0
rew_scale_pendulum_vel = -0.01
# - reset states/conditions
initial_pendulum_angle_range = [-0.25, 0.25] # pendulum angle sample range on reset [rad]
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -8,3 +8,4 @@
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403

View File

@@ -0,0 +1,249 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# import pdb; pdb.set_trace()
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
def gripper_lid_side_alignment_simple(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
height_offset: float = 0.1,
std: float = 0.1) -> torch.Tensor: # 增大 std 使其更宽松
"""Reward for positioning gripper center on the side of lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致(这是我们要对齐的方向)
- Z 方向:高度方向
目标:
1. 夹爪中心在 lid 的 Y 方向两侧(距离 side_distance
2. 夹爪中心在 lid 的 X 方向对齐X 方向接近 0
3. 夹爪中心在 lid 上方 height_offset 处
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# 计算相对位置(夹爪中心相对于 lid 中心)
rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
# 接受左侧或右侧
y_dist = torch.abs(rel_pos[:, 1])
# y_reward = 1.0 - torch.tanh(y_dist / std)
y_error = torch.abs(y_dist - side_distance)
y_reward = 1.0 - torch.tanh(y_error / std)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_reward = 1.0 - torch.tanh(x_dist / std)
# x_error = torch.abs(x_dist - side_distance)
# x_reward = 1.0 - torch.tanh(x_error / std)
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_reward = 1.0 - torch.tanh(z_error / std)
# 组合奖励:所有条件都要满足
reward = x_reward * y_reward * z_reward
return reward
def gripper_lid_orientation_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
std: float = 0.1) -> torch.Tensor:
"""Reward for aligning gripper orientation with lid orientation.
目标让夹爪的坐标系和lid的坐标系对齐
- 夹爪的X轴和lid的X轴平行
- 夹爪的Y轴和lid的Y轴平行
- 夹爪的Z轴和lid的Z轴平行
使用四元数误差来衡量对齐程度。
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# 获取lid的姿态世界坐标系
lid_quat_w = lid.data.root_quat_w # (num_envs, 4) - (w, x, y, z)
# 获取夹爪的姿态(世界坐标系)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] # (num_envs, 4)
# 计算姿态误差(返回角度误差,单位:弧度)
orientation_error = quat_error_magnitude(gripper_quat_w, lid_quat_w) # (num_envs,)
# 使用tanh核函数将误差映射到奖励
# std控制敏感度std越小对齐要求越严格
reward = 1.0 - torch.tanh(orientation_error / std)
return reward
def gripper_perpendicular_to_lid(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
std: float = 0.2) -> torch.Tensor:
"""Reward for aligning gripper perpendicular to lid surface.
目标让夹爪的Y轴抓取方向垂直于lid的表面
根据夹爪坐标系:
- X轴两个夹爪朝中心合并的方向红色箭头水平向左
- Y轴夹爪间空隙的方向抓取方向绿色箭头沿开口方向
- Z轴从夹爪"手掌"垂直向上(蓝色箭头)
我们希望夹爪的Y轴垂直于lid表面即Y轴与lid法向量平行
如果lid水平放置法向量是Z轴向上那么夹爪Y轴应该与Z轴平行
"""
robot: Articulation = env.scene[robot_cfg.name]
# 获取夹爪的姿态(世界坐标系)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :] # (num_envs, 4)
# 夹爪局部坐标系的Y轴方向抓取方向
gripper_y_local = torch.tensor([0.0, 1.0, 0.0], device=gripper_quat_w.device) # (3,)
gripper_y_local = gripper_y_local.unsqueeze(0).repeat(gripper_quat_w.shape[0], 1) # (num_envs, 3)
# 将夹爪Y轴旋转到世界坐标系
gripper_y_world = quat_apply(gripper_quat_w, gripper_y_local) # (num_envs, 3)
# lid的表面法向量假设lid水平放置法向量向上
lid_normal_world = torch.tensor([0.0, 0.0, 1.0], device=gripper_y_world.device) # (3,)
lid_normal_world = lid_normal_world.unsqueeze(0).repeat(gripper_y_world.shape[0], 1) # (num_envs, 3)
# 计算两个向量之间的角度
dot_product = torch.sum(gripper_y_world * lid_normal_world, dim=1) # (num_envs,)
dot_product = torch.clamp(dot_product, -1.0, 1.0)
angle = torch.acos(dot_product) # (num_envs,) - 范围[0, π]
# 我们希望角度接近0夹爪Y轴与lid法向量平行即垂直于lid表面
# 或者接近π夹爪Y轴向下也垂直于lid表面
# 选择较小的角度
angle_to_perpendicular = torch.min(angle, torch.pi - angle) # (num_envs,)
# 使用tanh核函数将角度误差映射到奖励
reward = 1.0 - torch.tanh(angle_to_perpendicular / std)
return reward

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok

View File

@@ -0,0 +1,355 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import T
import torch
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
# 用 DexCube 测试
# DexCube_CFG = RigidObjectCfg(
# prim_path="{ENV_REGEX_NS}/DexCube",
# init_state=RigidObjectCfg.InitialStateCfg(
# pos=[0.1, 0.8, 0.85],
# rot=[1.0, 0.0, 0.0, 0.0],
# lin_vel=[0.0, 0.0, 0.0],
# ang_vel=[0.0, 0.0, 0.0],
# ),
# spawn=UsdFileCfg(
# usd_path="/home/ubuntu/50T/maic_usd_assets/DexCube/dex_cube_instanceable.usd",
# scale=(0.2, 0.2, 0.2),
# rigid_props=RigidBodyPropertiesCfg(
# solver_position_iteration_count=8,
# solver_velocity_iteration_count=1,
# max_angular_velocity=1000.0,
# max_linear_velocity=1000.0,
# max_depenetration_velocity=5.0,
# disable_gravity=False,
# ),
# collision_props=CollisionPropertiesCfg(
# collision_enabled=True,
# contact_offset=0.01,
# rest_offset=0.0,
# torsional_patch_radius=0.0,
# ),
# ),
# )
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.25, 0.65, 0.85],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=8,
solver_velocity_iteration_count=1,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=5.0,
disable_gravity=False,
),
# collision_props=CollisionPropertiesCfg(
# collision_enabled=True,
# contact_offset=0.01,
# rest_offset=0.0,
# torsional_patch_radius=0.0,
# ),
),
)
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(disable_gravity=False, max_depenetration_velocity=1.0),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True,
solver_position_iteration_count=8,
solver_velocity_iteration_count=4,
stabilization_threshold=1e-6,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.05)),
actuators={}
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# DexCube: RigidObjectCfg = DexCube_CFG.replace(prim_path="{ENV_REGEX_NS}/DexCube")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
left_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 左臂的6个关节
body_name="left_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
# 6维动作(dx, dy, dz, droll, dpitch, dyaw)
# 可以分别指定每个维度的缩放,或者使用单个值应用到所有维度
scale=(0.05, 0.05, 0.05, 0.1, 0.1, 0.1), # 位置增量(3个) + 姿态增量(3个单位弧度)
# 或者简化为scale=0.05 # 应用到所有6个维度
)
grippers_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_.*"],
scale=0.5,
clip={
"left_hand_joint_left": (-0.03, 0.0),
"left_hand_joint_right": (0.0, 0.03),
},
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
# dexcube_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("DexCube")})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# # reset
# reset_mindbot_root = EventTerm(
# func=mdp.reset_root_state_uniform,
# mode="reset",
# params={
# "asset_cfg": SceneEntityCfg("Mindbot"),
# "pose_range": {"x": (0.0, 0.0)},
# "velocity_range": {"x": (0.0, 0.0)},
# },
# )
# reset_mindbot_joints = EventTerm(
# func=mdp.reset_joints_by_offset,
# mode="reset",
# params={"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
# "position_range": (-0.1, 0.1), "velocity_range": (-0.05, 0.05)},
# )
# 重置所有关节到 init_state无偏移
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 只对左臂添加随机偏移
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
"position_range": (-0.1, 0.1),
"velocity_range": (-0.05, 0.05),
},
)
# reset_dexcube = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
# params={"asset_cfg": SceneEntityCfg("DexCube"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# reaching_dexcube = RewTerm(func=mdp.gripper_lid_distance,
# params={"std": 0.3, "lid_cfg": SceneEntityCfg("lid"),
# "robot_cfg": SceneEntityCfg("Mindbot"), "gripper_body_name": "left_hand_body"},
# weight=1.0)
# 鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_side_alignment = RewTerm(func=mdp.gripper_lid_side_alignment_simple,
params={"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"side_distance": 0.05, # Y方向距离 lid 中心 5cm
"height_offset": 0.1, # Z方向lid 上方 0.1m
"std": 0.3}, # 增大 std 使奖励更宽松
weight=1.0)
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.001)
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.0001,
params={"asset_cfg": SceneEntityCfg("Mindbot")})
# gripper_orientation_alignment = RewTerm(
# func=mdp.gripper_lid_orientation_alignment,
# weight=1.0, # 可以根据需要调整权重
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "gripper_body_name": "left_hand__l",
# "std": 0.3, # 姿态对齐的容忍度(弧度),可以调整
# },
# )
gripper_perpendicular = RewTerm(
func=mdp.gripper_perpendicular_to_lid,
weight=1.0, # 可以根据需要调整权重
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "left_hand_body",
"std": 0.2, # 角度容忍度弧度约11.5度,可以调整
},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
dexcube_dropping = DoneTerm(func=mdp.lid_dropped,
params={"minimum_height": -0.05, "lid_cfg": SceneEntityCfg("lid")})
# 当夹爪在 lid 两侧合适位置,且 Z 方向在 lid 上方 0.1m 时终止
gripper_at_side = DoneTerm(func=mdp.gripper_at_lid_side,
params={"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l", # 改为两个下划线
"right_gripper_name": "left_hand_r",
"side_distance": 0.05,
"side_tolerance": 0.01,
"alignment_tolerance": 0.02,
"height_offset": 0.1,
"height_tolerance": 0.02})
##
# Environment configuration
##
@configclass
class CurriculumCfg:
action_rate = CurrTerm(func=mdp.modify_reward_weight,
params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
joint_vel = CurrTerm(func=mdp.modify_reward_weight,
params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 50
self.sim.render_interval = self.decimation

View File

@@ -0,0 +1,46 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import omni.ext
# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
def some_public_function(x: int):
print("[mindbot] some_public_function was called with x: ", x)
return x**x
# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
# on_shutdown() is called.
class ExampleExtension(omni.ext.IExt):
# ext_id is current extension id. It can be used with extension manager to query additional information, like where
# this extension is located on filesystem.
def on_startup(self, ext_id):
print("[mindbot] startup")
self._count = 0
self._window = omni.ui.Window("My Window", width=300, height=300)
with self._window.frame:
with omni.ui.VStack():
label = omni.ui.Label("")
def on_click():
self._count += 1
label.text = f"count: {self._count}"
def on_reset():
self._count = 0
label.text = "empty"
on_reset()
with omni.ui.HStack():
omni.ui.Button("Add", clicked_fn=on_click)
omni.ui.Button("Reset", clicked_fn=on_reset)
def on_shutdown(self):
print("[mindbot] shutdown")

View File

@@ -0,0 +1,123 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configuration for a simple Cartpole robot."""
import isaaclab.sim as sim_utils
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab.assets import ArticulationCfg
from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
##
# Configuration
##
MINDBOT_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
# 1. UPDATE THE USD PATH to your local file
usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd",
# usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0,
linear_damping=0.5,
angular_damping=0.5,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
fix_root_link=True,
enabled_self_collisions=False,
solver_position_iteration_count=16,
solver_velocity_iteration_count=8,
stabilization_threshold=1e-6, # 忽略小穿透
),
collision_props=sim_utils.CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005, # 尝试从默认值增大到 0.01 或 0.02
rest_offset=0, # 保持略小于 contact_offset
),
),
init_state=ArticulationCfg.InitialStateCfg(
# 2. DEFINE THE INITIAL POSITIONS FOR ALL 12 JOINTS
# All are set to 0.0 here, but you should adjust them for a suitable "home" position.
joint_pos={
# 左臂 (Left Arm)
"l_joint1": -0.524,
"l_joint2": -1.047,
"l_joint3": -1.047,
"l_joint4": -1.571,
"l_joint5": 1.571,
"l_joint6": 0,
# 右臂 (Right Arm)
"r_joint1": -0.524,
"r_joint2": -1.047,
"r_joint3": -1.047,
"r_joint4": -1.571,
"r_joint5": 1.571,
"r_joint6": 0,
# left wheel
"left_b_revolute_Joint": 0.0,
"left_f_revolute_Joint": 0.0,
# right wheel
"right_b_revolute_Joint": 0.0,
"right_f_revolute_Joint": 0.0,
# left gripper
# 注意:如果希望初始状态为完全打开,应该设置为 left=0.0, right=0.0
# 当前设置为 right=0.01 表示略微闭合状态
"left_hand_joint_left": 0.0, # 范围:(-0.03, 0.0)0.0=打开,-0.03=闭合
"left_hand_joint_right": 0.0, # 范围:(0.0, 0.03)0.0=打开0.03=闭合
# right gripper
"right_hand_joint_left": 0.0,
"right_hand_joint_right": 0.0,
# trunk
"PrismaticJoint": 0.35,
# head
"head_revoluteJoint": 0.5236
},
# The initial (x, y, z) position of the robot's base in the world
pos=(0, 0, 0.05),
),
actuators={
# 3. DEFINE ACTUATOR GROUPS FOR THE ARMS
# Group for the 6 left arm joints using a regular expression
"left_arm": ImplicitActuatorCfg(
joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
stiffness=0.0, #100000.0, # Note: Tune this for desired control performance
damping=1000.0, #10000.0, # Note: Tune this for desired control performance
),
# Group for the 6 right arm joints using a regular expression
"right_arm": ImplicitActuatorCfg(
joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
effort_limit_sim=100.0, # Note: Tune this based on your robot's specs
velocity_limit_sim=100.0, # Note: Tune this based on your robot's specs
stiffness=10000.0, #100000.0, # Note: Tune this for desired control performance1
damping=1000.0, #10000.0, # Note: Tune this for desired control performance
),
"head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
"trunk": ImplicitActuatorCfg(
joint_names_expr=["PrismaticJoint"],
stiffness=40000.0, # 从 10000 增:强持位
damping=4000.0, # 从 1000 增:抗振荡
effort_limit_sim=10000.0,
velocity_limit_sim=0.2,
),
"wheels": ImplicitActuatorCfg(
joint_names_expr=[".*_revolute_Joint"],
stiffness=0.0,
damping=100.0,
effort_limit_sim=200.0, # <<<<<< 新增:力矩上限,足够驱动轮子
velocity_limit_sim=50.0, # <<<<<< 新增:速度上限,防止过快
),
"grippers": ImplicitActuatorCfg(
joint_names_expr=[".*_hand_joint.*"],
stiffness=10000.0,
damping=1000.0,
effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
),
},
)

View File

@@ -10,6 +10,8 @@
##
from isaaclab_tasks.utils import import_packages
from . import manager_based
from . import direct
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["utils", ".mdp"]

View File

@@ -4,3 +4,6 @@
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401
from . import pullUltrasoundLidUp
from . import pullUltrasoundLid2Table
from . import openOven

View File

@@ -1,26 +0,0 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)

View File

@@ -1,180 +0,0 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from . import mdp
##
# Pre-defined configs
##
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG # isort:skip
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# reset
reset_cart_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
"position_range": (-1.0, 1.0),
"velocity_range": (-0.5, 0.5),
},
)
reset_pole_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
"position_range": (-0.25 * math.pi, 0.25 * math.pi),
"velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Constant running reward
alive = RewTerm(func=mdp.is_alive, weight=1.0)
# (2) Failure penalty
terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
# (3) Primary task: keep pole upright
pole_pos = RewTerm(
func=mdp.joint_pos_target_l2,
weight=-1.0,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
)
# (4) Shaping tasks: lower cart velocity
cart_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.01,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
)
# (5) Shaping tasks: lower pole angular velocity
pole_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.005,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Time out
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Cart out of bounds
cart_out_of_bounds = DoneTerm(
func=mdp.joint_pos_out_of_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
)
##
# Environment configuration
##
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 5
# viewer settings
self.viewer.eye = (8.0, 0.0, 5.0)
# simulation settings
self.sim.dt = 1 / 120
self.sim.render_interval = self.decimation

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Open-Oven-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.open_oven_env_cfg:OpenOvenEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,71 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
experiment_name = "open_oven" # 建议改个名,不要叫 cartpole 了
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,758 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
def gripper_handle_orientation_alignment(
env,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C",
gripper_body_name: str = "right_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 夹爪指尖指向 (Local Z)
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 夹爪手指开合轴 (Local Y)
lid_handle_axis: tuple = (0.0, 0.0, 1.0),
max_angle_deg: float =60.0,
# handle_local_axis 参数已删除,因为直接使用世界坐标系 Z 轴更稳健
) -> torch.Tensor:
"""
修正版:
1. 解决了梯度消失:使用 (dot+1)/2 替代 clamp保证全角度有梯度。
2. 解决了轴向错误:直接认定手柄是垂直的 (World Z),修复抓取姿态。
"""
# 1. 获取数据
# dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它,节省计算)
robot = env.scene[robot_cfg.name]
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
box=env.scene[dryingbox_cfg.name]
hd_ids,_=box.find_bodies([handle_name],preserve_order=True)
handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:]
# --- 约束 A: 指向约束 (Pointing) ---
# 目标:夹爪 Z 轴 指向 World +X
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1)
dot_fwd=torch.clamp(dot_fwd,-1.0,1.0)
angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_forward = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward))
# nan -> 0
rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward)
# [关键修改 1] 使用软化奖励,提供全局梯度
# 范围 [0, 1]: 1.0 是完美对齐0.5 是垂直0.0 是完全背对
# 这样即使现在是指向地面的 (dot=0),它也有 0.5 分,且知道往上抬分会变高
# rew_forward = (dot_fwd + 1.0) / 2.0
# 如果你希望能更陡峭一点,可以加个平方: rew_forward = rew_forward * rew_forward
# --- 约束 B: 抓取姿态约束 (Grasp Alignment) ---
# 目标:夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z)
# 这样手指就是水平开合的
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local))
# [关键修改 2] 强制假设手柄是垂直的 (World Z)
# 除非你的烘箱会倾斜倒在地上,否则这是最稳健的写法,不用管局部坐标系
handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1)
handle_axis_world=quat_apply(handle_quat_w,handle_axis_local)
dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1)
rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_forward * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {dot_fwd}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# 在 rewards.py 中,修改 _is_grasped 函数,使其支持 dryingbox 和 handle_name
def _is_grasped(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg, # 改为 dryingbox_cfg
robot_cfg: SceneEntityCfg,
handle_name: str, # 新增 handle_name 参数
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪X轴距离合适不能压在把手上 (X-Axis Check)
"""
# 1. 获取对象
dryingbox = env.scene[dryingbox_cfg.name] # 改为 dryingbox
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
handle_pos_w[:, 1] += height_offset # X方向偏移
handle_pos_w[:, 0] += 0.01
# handle_pos_w[:, 2] -= 0.04
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
if env.common_step_counter % 500 == 0:
print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
is_near = dist_to_handle < grasp_radius
# 5. 条件二X轴距离判定 (防止压在把手上)
x_diff = gripper_center[:, 0] - handle_pos_w[:, 0]
is_x_valid = torch.abs(x_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
is_not_empty = dist_fingertips > 0.005
# 在计算 dist_to_handle 后
if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15:
delta = gripper_center[0] - handle_pos_w[0]
print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, "
f"center={gripper_center[0].cpu().numpy()}, "
f"delta={delta.cpu().numpy()}, "
f"dist={dist_to_handle[0].item():.4f}, "
f"fingertip_dist={dist_fingertips[0].item():.4f}, "
f"joint_pos={joint_pos[0].cpu().numpy()}")
# 综合判定
return is_near & is_x_valid & is_effort_closing & is_not_empty
# 修改 gripper_close_when_near 函数
def gripper_close_when_near(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"), # 改为 dryingbox_cfg
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C", # 新增 handle_name 参数
left_gripper_name: str = "right_hand_l", # 更新默认值
right_gripper_name: str = "right_hand__r", # 更新默认值
joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 更新默认值
target_close_pos: float = 0.03,
height_offset: float = 0.005, # 更新默认值
grasp_radius: float = 0.03 # 更新默认值
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, dryingbox_cfg, robot_cfg, handle_name, # 传入 handle_name
left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# handle_name: str = "Equipment_BB_13_C",
# left_gripper_name: str = "right_hand_l",
# right_gripper_name: str = "right_hand__r",
# joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"], # 保留接口但不强依赖
# # 关键参数
# handle_thickness: float = 0.0388, # 把手厚度≈3.88cm
# thickness_tol: float = 0.008, # 容差≈0.8cm,控制惩罚/奖励斜率
# grasp_radius: float = 0.04, # 抓取判定半径,略大于把手厚度
# height_offset: float = 0.00, # 如需上下偏移可调整
# target_close_pos: float = 0.03, # 保留给 joint/clamping 兼容
# ) -> torch.Tensor:
# dryingbox = env.scene[dryingbox_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 目标位置(把手中心,可做微偏移)
# handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
# handle_pos[:, 1] += height_offset
# # 手指位置与中心
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# center = (pos_L + pos_R) / 2.0
# # 到把手的距离(只在近距离才给分)
# dist = torch.norm(center - handle_pos, dim=-1)
# is_near = dist < grasp_radius
# # 实际两指间距
# finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# # 奖励:两指间距越接近把手厚度越好,平滑衰减
# error = torch.abs(finger_dist - handle_thickness)
# close_reward = 1.0 - torch.tanh(error / thickness_tol)
# # 避免 NaN
# close_reward = torch.nan_to_num(close_reward, nan=0.0)
# return torch.where(is_near, close_reward, torch.zeros_like(close_reward))
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward
def debug_robot_physics(
env: ManagerBasedRLEnv,
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "right_hand_body",
distance_threshold: float = 0.1, # 当距离小于此值时打印
print_interval: int = 10 # 每N步打印一次
) -> None:
"""
调试函数:打印机器人的速度、加速度等信息
"""
# 使用静态变量控制打印频率
if not hasattr(debug_robot_physics, "step_count"):
debug_robot_physics.step_count = 0
debug_robot_physics.step_count += 1
if debug_robot_physics.step_count % print_interval != 0:
return
robot = env.scene[robot_cfg.name]
# 1. 获取机器人根节点的速度和加速度
root_lin_vel = robot.data.root_lin_vel_w # (num_envs, 3)
root_ang_vel = robot.data.root_ang_vel_w # (num_envs, 3)
root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1) # (num_envs,)
root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1) # (num_envs,)
# 2. 获取夹爪body的速度
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :] # (num_envs, 3)
gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :] # (num_envs, 3)
gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1)
gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1)
# 3. 获取关节速度(可能影响碰撞)
joint_vel = robot.data.joint_vel # (num_envs, num_joints)
max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0] # (num_envs,)
# 4. 检查是否有异常高的速度(可能导致碰撞问题)
high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
# if torch.any(high_vel_mask):
# env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
# for env_id in env_ids[:3]: # 只打印前3个环境
# print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
# print(f" 机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
# print(f" 机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
# print(f" 夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
# print(f" 夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
# print(f" 最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
handle_name: str = "Equipment_BB_13_C",
left_gripper_name: str = "right_hand_l",
right_gripper_name: str = "right_hand__r",
height_offset: float = 0.03,
std: float = 0.1) -> torch.Tensor:
dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
target_pos = handle_pos_w.clone()
# target_pos[:, 1] += height_offset
# target_pos[:, 0] += 0.01
# target_pos[:, 2] -= 0.04
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# ===== 添加调试打印 =====
# 当夹爪接近手柄时(距离 < 0.15m)打印物理信息
# close_mask = distance < 0.03
# if torch.any(close_mask):
# debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
# # ========================
position_reward = 1.0 - torch.tanh(distance / std) + 1e-6
# 计算距离后插入
# if env.common_step_counter % 500 == 0:
# print("pos_align dbg env0: "
# f"handle={handle_pos_w[0].cpu().numpy()}, "
# f"target={target_pos[0].cpu().numpy()}, "
# f"gripL={left_gripper_pos[0].cpu().numpy()}, "
# f"gripR={right_gripper_pos[0].cpu().numpy()}, "
# f"center={gripper_center[0].cpu().numpy()}, "
# f"dist={distance[0].item():.4f}")
return position_reward
def gripper_open_when_far(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg, # 保留接口一致性,但不再使用
robot_cfg: SceneEntityCfg,
handle_name: str, # 保留接口一致性,但不再使用
left_gripper_name: str,
right_gripper_name: str,
joint_names: list, # 不再使用
dist_threshold: float = 0.5, # 不再使用
target_open_dist: float = 0.06, # 两手指完全打开时的距离(米)
) -> torch.Tensor:
"""
奖励夹爪最大张开状态:
只根据左右两根手指之间的实际距离计算奖励,
当距离接近 target_open_dist (0.06m) 时奖励最大。
"""
# 1. 获取机器人
robot = env.scene[robot_cfg.name]
# 2. 获取左右手指的世界坐标
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# 3. 计算两手指之间的实际距离
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# 4. 奖励:距离越接近最大打开距离,奖励越高
error = torch.abs(finger_dist - target_open_dist)
reward = 1.0 - torch.tanh(error / 0.01) # 0.01m = 1cm 作为平滑尺度
return reward
def gripper_open_penalty_when_far(
env: ManagerBasedRLEnv,
dryingbox_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
handle_name: str,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
dist_threshold: float = 0.03, # 3cm 抓取保护区
max_penalty_dist: float = 0.55, # 0.55m 惩罚激活边界
) -> torch.Tensor:
"""
使用 Sigmoid 激活函数的惩罚项:
1. 距离把手 < 0.55m 时,惩罚迅速激活并保持在高位 (接近 1.0)。
2. 距离把手 < 0.03m 时,惩罚强制清零,允许闭合抓取。
3. 两手指距离越小,惩罚值越大。
"""
robot = env.scene[robot_cfg.name]
dryingbox = env.scene[dryingbox_cfg.name]
# 1. 获取位置信息
handle_ids, _ = dryingbox.find_bodies([handle_name])
handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# 2. 计算距离
gripper_center = (pos_L + pos_R) / 2.0
distance = torch.norm(gripper_center - handle_pos, dim=1)
finger_dist = torch.norm(pos_L - pos_R, dim=-1)
# 3. 距离激活门控 (Sigmoid 方法)
# 当 distance < 0.55 时,(0.55 - dist) 为正sigmoid 趋近于 1.0
# 50.0 是陡峭系数系数越大0.55m 处的切换越像开关
dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0)
# 4. 抓取区保护 (Deadzone)
# 距离小于 3cm 时is_not_near 变为 0完全撤销惩罚
is_not_near = (distance > dist_threshold).float()
# 5. 闭合程度计算 (0 = 全开, 1 = 全闭)
# 假设手指最大张开距离为 0.06m
max_open = 0.06
closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0)
# 6. 综合计算并 Clamp
# 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时,值才会很大
penalty_magnitude = dist_gate * is_not_near * closedness
# 返回负值作为惩罚 (值域 [-1, 0])
return -torch.clamp(penalty_magnitude, 0.0, 1.0)
# def gripper_open_penalty_when_far(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# handle_name: str,
# left_gripper_name: str,
# right_gripper_name: str,
# joint_names: list,
# dist_threshold: float = 0.03, # 3cm 阈值
# ) -> torch.Tensor:
# """
# 惩罚项:在距离把手 3cm 以外时,如果夹爪未张开则扣分。
# 这不会鼓励离远,而是鼓励在远处时‘必须张开’。
# """
# robot = env.scene[robot_cfg.name]
# dryingbox = env.scene[dryingbox_cfg.name]
# # 1. 获取把手和夹爪中心位置
# handle_ids, _ = dryingbox.find_bodies([handle_name])
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
# # 2. 计算实时距离
# distance = torch.norm(gripper_center - handle_pos, dim=1)
# # 3. 计算夹爪的“闭合度” (偏差值)
# # 假设关节位置 0.0 是最大张开状态abs(pos) 越大表示越闭合
# joint_ids, _ = robot.find_joints(joint_names)
# joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# mean_closedness = torch.mean(joint_pos_abs, dim=1)
# # 4. 惩罚逻辑
# # 如果距离 > 3cm (is_far): 奖励 = -mean_closedness (闭合越多扣分越多)
# # 如果距离 <= 3cm: 奖励 = 0 (不再扣分,允许闭合拿抓取分)
# if env.common_step_counter % 100 == 0:
# print("gripper_closed_penalty distance: ", distance[0])
# is_far = distance > dist_threshold
# penalty = -mean_closedness
# # 这样在远处最好也就是0分全开不存在通过远离来刷分的可能
# return torch.where(is_far, penalty, torch.zeros_like(penalty))
# def gripper_open_until_near(
# env: ManagerBasedRLEnv,
# dryingbox_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# handle_name: str,
# left_gripper_name: str,
# right_gripper_name: str,
# joint_names: list,
# dist_threshold: float = 0.03, # 3cm 阈值
# ) -> torch.Tensor:
# """奖励夹爪在远离目标时保持打开状态,接近后不再约束。"""
# robot = env.scene[robot_cfg.name]
# dryingbox = env.scene[dryingbox_cfg.name]
# # 1. 计算夹爪中心到把手的距离
# handle_ids, _ = dryingbox.find_bodies([handle_name])
# handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
# distance = torch.norm(gripper_center - handle_pos, dim=1)
# # 2. 获取夹爪当前关节位置 (假设 0 是完全打开)
# joint_ids, _ = robot.find_joints(joint_names)
# # 计算当前关节位置与 0打开状态的偏差
# joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# mean_error = torch.mean(joint_pos_abs, dim=1)
# # 3. 计算奖励逻辑
# # 距离 > 3cm 时:关节越接近 0打开奖励越高 (最大为 1.0)
# # 距离 <= 3cm 时:奖励为 0不干涉模型闭合
# is_far = distance > dist_threshold
# open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减
# return torch.where(is_far, open_reward, torch.zeros_like(open_reward))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds

View File

@@ -0,0 +1,531 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
DRYINGBOX_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=[0.95, -0.45, 0.9],
rot=[-0.7071, 0.0, 0.0, 0.7071]
),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=["RevoluteJoint"],
stiffness=10000.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.2, 0.65, 0.9],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=[".*"],
stiffness=0.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
##
# Scene definition
##
@configclass
class OpenOvenSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
# ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
# lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
# 新增:通用获取 body 世界姿态的函数
def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
return asset.data.body_quat_w[:, body_ids[0]]
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
right_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["r_joint[1-6]"], # 左臂的6个关节
body_name="right_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
# left_arm_fixed = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# # 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
# scale=0.0,
# offset={
# "l_joint1": 2.3562,
# "l_joint2": -1.2217,
# "l_joint3": -1.5708,
# "l_joint4": 1.5708,
# "l_joint5": 1.5708,
# "l_joint6": -1.2217,
# },
# )
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["right_hand_joint_left", "right_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.25,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
dryingbox_handle_pos = ObsTerm(
func=get_body_pos_rel,
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
)
dryingbox_handle_quat = ObsTerm(
func=get_body_quat_w,
params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
)
# lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
# ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 只对左臂添加随机偏移
reset_mindbot_right_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
# reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
# params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
# reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
# params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_handle_orientation_alignment = RewTerm(
func=mdp.gripper_handle_orientation_alignment,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name": "Equipment_BB_13_C",
"gripper_body_name": "right_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 0.0, 1.0),
# 删除了 handle_axis因为新函数中不再使用它
"max_angle_deg": 30.0, # 建议改为 30.0 或更小85.0 对指向性约束来说太宽松
},
weight=4.0 # 建议保持在 4.0 或 5.0,确保姿态约束有足够的引导力
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_handle_position_alignment = RewTerm(
func=mdp.gripper_handle_position_alignment,
params={
"dryingbox_cfg": SceneEntityCfg("dryingbox"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"handle_name":"Equipment_BB_13_C",
"left_gripper_name": "right_hand_l",
"right_gripper_name": "right_hand__r",
"height_offset": 0.00, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度0.3 ###0.1
},
weight=3.0
)
# 强制远距离打开奖励
# 远距离闭合惩罚
# gripper_closed_penalty = RewTerm(
# func=mdp.gripper_open_penalty_when_far,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name": "Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# "dist_threshold": 0.02, # 3cm 以外不允许闭合
# "max_penalty_dist":0.055 #0.055
# },
# weight=1.0 # 提高权重让扣分很痛8.0
# )
# # 【新增】精细对齐 (引导进入 2cm 圈)
# gripper_handle_fine_alignment = RewTerm(
# func=mdp.gripper_handle_position_alignment,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "height_offset": 0.00, # Z方向lid 上方 0.1m
# "std": 0.03, # 位置对齐的容忍度0.05
# },
# weight=5.0 # 高权重
# )
# gripper_close_interaction = RewTerm(
# func=mdp.gripper_close_when_near,
# params={
# "dryingbox_cfg": SceneEntityCfg("dryingbox"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "handle_name":"Equipment_BB_13_C",
# "left_gripper_name": "right_hand_l",
# "right_gripper_name": "right_hand__r",
# "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
# "target_close_pos": 0.03,
# "height_offset": 0.00,#0.07
# "grasp_radius": 0.04,#original 0.03
# },
# weight=10.0
# )
# lid_lifted_reward = RewTerm(
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
# },
# weight=10.0 # 给一个足够大的权重
# )
# lid_lifting_reward = RewTerm(
# func=mdp.lid_lift_progress_reward,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
# "target_height_lift": 0.25,
# "height_offset": 0.07, # 与其他奖励保持一致
# "std": 0.1
# },
# # 权重设大一点,告诉它这是最终目标
# weight=10.0
# )
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
# lid_fly_away = DoneTerm(
# func=mdp.base_height_failure,
# params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
# )
# # 新增:盖子掉落判定
# # 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
# lid_dropped = DoneTerm(
# func=mdp.base_height_failure, # 复用高度判定函数
# params={
# "asset_cfg": SceneEntityCfg("lid"),
# "minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
# },
# )
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class OpenOvenEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: OpenOvenSceneCfg = OpenOvenSceneCfg(num_envs=5, env_spacing=3.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Pull-Ultrasound-Lid-Table-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,71 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,902 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.1,
# std: float = 0.1) -> torch.Tensor:
# """奖励函数夹爪相对于lid的位置对齐。
# Args:
# env: 环境实例
# lid_cfg: lid的配置
# robot_cfg: 机器人的配置
# left_gripper_name: 左侧夹爪body名称
# right_gripper_name: 右侧夹爪body名称
# side_distance: Y方向的期望距离
# height_offset: Z方向的期望高度偏移
# std: tanh核函数的标准差
# Returns:
# 位置对齐奖励 (num_envs,)
# """
# lid: RigidObject = env.scene[lid_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3]
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # 计算相对位置(夹爪中心相对于 lid 中心)
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# # X 方向位置:应该对齐(接近 0
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
# x_reward = 1.0 - torch.tanh(x_dist / std)
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
# # y_error = torch.abs(y_dist - side_distance)
# # y_reward = 1.0 - torch.tanh(y_error / std)
# y_reward = 1.0 - torch.tanh(y_dist / std)
# # Z 方向位置:应该在 lid 上方 height_offset 处
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
# z_reward = 1.0 - torch.tanh(z_error / std)
# # 组合位置奖励
# position_reward = x_reward * y_reward * z_reward
# return position_reward
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数夹爪相对于lid的位置对齐简化版
计算夹爪中心到目标点lid上方height_offset处的距离奖励。
Args:
env: 环境实例
lid_cfg: lid的配置
robot_cfg: 机器人的配置
left_gripper_name: 左侧夹爪body名称
right_gripper_name: 右侧夹爪body名称
height_offset: Z方向的期望高度偏移目标点在lid上方
std: tanh核函数的标准差
Returns:
位置对齐奖励 (num_envs,)
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 目标点lid位置 + height_offsetZ方向
target_pos = lid_pos_w.clone()
# print(target_pos[0])
target_pos[:, 2] += height_offset # 在lid上方height_offset处
# print(target_pos[0])
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# print(gripper_center[0])
# 计算夹爪中心到目标点的3D距离
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 距离奖励:距离越小,奖励越大
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# def gripper_lid_orientation_alignment(
# env,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# gripper_body_name: str = "left_hand_body",
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
# ) -> torch.Tensor:
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 获取姿态
# lid_quat_w = lid.data.root_quat_w
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# # 3. 垂直对齐 (Vertical Alignment)
# # 目标:夹爪 +Y 指向 World -Z
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
# # print(grip_fwd_world[0])
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
# # rew_vertical = (dot_vertical + 1.0) / 2.0
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
# # nan -> 0
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# # 4. 偏航对齐 (Yaw Alignment)
# # 目标:夹爪 +Z 平行于 Lid +Y
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# # nan -> 0
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# # 5. 组合
# total_reward = rew_vertical * rew_yaw
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# # debug
# if not torch.isfinite(total_reward).all():
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
# print(f"rew_vertical: {rew_vertical}")
# print(f"rew_yaw: {rew_yaw}")
# return total_reward
def gripper_lid_orientation_alignment(
env,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
) -> torch.Tensor:
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 获取姿态
lid_quat_w = lid.data.root_quat_w
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# 3. 垂直对齐 (Vertical Alignment)
# 目标:夹爪前向轴指向 World -Z向下
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# 计算点积(归一化后,点积 = cos(角度)
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
# 计算角度(弧度)
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
# 如果角度 <= max_angle_deg给予奖励
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
# nan -> 0
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# 4. 偏航对齐 (Yaw Alignment)
# 目标:夹爪 +Z 平行于 Lid +Y
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_vertical * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
# # 注意:确保 joint_names 只包含那两个夹爪关节
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
# ) -> torch.Tensor:
# """
# 逻辑很简单:
# 1. 如果 距离 < close_threshold目标关节位置 = 0.03 (闭合)
# 2. 如果 距离 >= close_threshold目标关节位置 = 0.0 (张开)
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算距离 (末端 vs Lid中心)
# lid_pos_w = lid.data.root_pos_w[:, :3]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # distance: (num_envs,)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 3. 动态确定目标关节值
# # 如果距离小于阈值,目标设为 close_pos否则设为 open_pos
# # target_val: (num_envs, 1)
# # print(distance)
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 4. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# # joint_pos: (num_envs, 2)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# # 取绝对值防止左指是负数的情况虽然你的配置里范围看起来都是正的加上abs更保险
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 5. 计算误差
# # error: (num_envs,) -> 两个手指误差的平均值
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # # 6. 返回负奖励 (惩罚)
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
# # return -error
# # =====================================================
# # 🚀 核心修复:安全输出
# # =====================================================
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
# reward = -torch.tanh(error / 0.01)
# # 2. 过滤 NaN这一步能救命
# # 如果物理引擎算出 NaN这里会把它变成 0.0,防止训练崩溃
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
# return reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05,
# target_open_pos: float = 0.0,
# target_close_pos: float = 0.03,
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
# ) -> torch.Tensor:
# """
# 逻辑:
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
# 2. 如果距离 < close_threshold目标关节位置 = 闭合。
# 3. 否则:目标关节位置 = 张开。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
# # 获取夹爪位置
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # 3. 计算距离 (夹爪 vs 目标抓取点)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 4. 动态确定目标关节值
# # 靠近抓取点 -> 闭合;远离 -> 张开
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 5. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 6. 计算误差并返回奖励
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # 使用 tanh 限制数值范围
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
# return torch.nan_to_num(reward, nan=0.0)
def _is_grasped(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪Z轴高度合适不能压在盖子上面 (Z-Axis Check)
"""
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
is_near = dist_to_handle < grasp_radius
# 5. 条件二Z轴高度判定 (防止压在盖子上)
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
# 获取关节位置 (绝对值)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# === 修改闭合判定 ===
# 1. 关节位置判定 (Effort Check)
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
# 假设 0.05 是你的新 target_close_pos
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
# 获取指尖位置
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
# 假设把手厚度是 1cm (0.01m)
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
# 如果指尖距离 >= 0.005,说明中间有东西挡着
is_not_empty = dist_fingertips > 0.005
# 综合判定:
# 1. 在把手附近 (is_near)
# 2. 高度合适 (is_z_valid)
# 3. 在用力闭合 (is_effort_closing)
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
return is_near & is_z_valid & is_effort_closing & is_not_empty
def gripper_close_when_near(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
target_close_pos: float = 0.03,
height_offset: float = 0.02,
grasp_radius: float = 0.05
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取当前关节位置绝对值 (num_envs, num_joints)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# 计算两个指关节的平均闭合深度
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
# 计算奖励系数:当前位置 / 目标闭合位置
# 这样当关节越接近 0.03 时,奖励越接近 1.0
# 强制让 Agent 产生“压满”动作的冲动
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
# 如果没对准或者没夹到,奖励依然是 0
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_on_table_area_tanh_reward(
env: ManagerBasedRLEnv,
std_xy: float,
std_z: float,
table_cfg: SceneEntityCfg,
table_dims: tuple[float, float], # 桌面的长和宽 (x_size, y_size)
surface_height: float, # 桌面相对于桌子中心的 Z 偏移
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# 抓取判定参数 (与你之前的逻辑一致)
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.02,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
"""
基于区域的 Tanh 奖励:只要盖子在桌面上方范围内,且高度合适,即给高分。
"""
# 1. 获取数据
lid: RigidObject = env.scene[lid_cfg.name]
table: RigidObject = env.scene[table_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
table_pos_w = table.data.root_pos_w[:, :3]
# 2. 计算 XY 轴相对于桌子中心的偏移
# 假设桌子是沿轴对齐的 (Axis-Aligned)
rel_pos_xy = lid_pos_w[:, :2] - table_pos_w[:, :2]
# 计算点到矩形的距离 (Distance to Rectangle)
# dx, dy 是盖子中心超出桌面边缘的距离
half_width = table_dims[0] / 2.0
half_depth = table_dims[1] / 2.0
dx = torch.clamp(torch.abs(rel_pos_xy[:, 0]) - half_width, min=0.0)
dy = torch.clamp(torch.abs(rel_pos_xy[:, 1]) - half_depth, min=0.0)
dist_xy = torch.sqrt(dx**2 + dy**2)
# 3. 计算 Z 轴高度偏差
# 目标高度是桌子中心高度 + 表面偏移
target_z = table_pos_w[:, 2] + surface_height
dist_z = torch.abs(lid_pos_w[:, 2] - target_z)
# 4. 判定是否抓稳
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 5. 计算 Tanh 奖励
# 只要在矩形内dist_xy=0reward_xy=1.0
reward_xy = 1.0 - torch.tanh(dist_xy / std_xy)
reward_z = 1.0 - torch.tanh(dist_z / std_z)
# 综合奖励XY 必须对准且 Z 必须对准
total_reward = reward_xy * reward_z
# 只有抓稳了才给引导分
return torch.where(is_grasped, total_reward, torch.zeros_like(total_reward))
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward
# def lid_grasped_bonus(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg,
# robot_cfg: SceneEntityCfg,
# left_gripper_name: str,
# joint_names: list,
# distance_threshold: float = 0.05,
# closed_pos: float = 0.03
# ) -> torch.Tensor:
# # 1. 计算距离
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# lid_pos = lid.data.root_pos_w[:, :3]
# distance = torch.norm(gripper_pos - lid_pos, dim=-1)
# # 2. 检查距离是否达标
# is_close = distance < distance_threshold
# # 3. 检查夹爪是否闭合
# joint_ids, _ = robot.find_joints(joint_names)
# joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# # 检查是否接近闭合 (比如 > 0.02)
# is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
# # 4. 给分
# # 只有同时满足才给 1.0
# reward = torch.where(is_close & is_closed, 1.0, 0.0)
# return torch.nan_to_num(reward)

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds

View File

@@ -0,0 +1,492 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.2, 0.65, 0.9],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=[".*"],
stiffness=0.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
left_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 左臂的6个关节
body_name="left_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
# grippers_position = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["left_hand_joint_.*"],
# scale=1,
# clip={
# "left_hand_joint_left": (-0.03, 0.0),
# "left_hand_joint_right": (0.0, 0.03),
# },
# )
# grippers_position = mdp.CoupledJointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# scale=0.015, # (0.03 - 0) / 2
# offset=0.015, # (0.03 + 0) / 2
# clip=None
# )
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.3,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# reset_mindbot_root=EventTerm(
# func=mdp.reset_root_state_uniform,
# mode="reset",
# params={
# "asset_cfg":SceneEntityCfg("Mindbot"),
# "pose_range":{"x":(0.0, 0.0), "y":(0.0, 0.0)},
# "velocity_range":{"x":(0.0, 0.0), "y":(0.0, 0.0)},
# },
# )
# 重置所有关节到 init_state无偏移
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 只对左臂添加随机偏移
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# 姿态对齐奖励: stage 1
# gripper_lid_orientation_alignment = RewTerm(
# func=mdp.gripper_lid_orientation_alignment,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "robot_cfg": SceneEntityCfg("Mindbot"),
# "gripper_body_name": "left_hand_body",
# "gripper_forward_axis": (0.0, 0.0, 1.0),
# "gripper_width_axis": (0.0, 1.0, 0.0),
# "lid_handle_axis": (0.0, 1.0, 0.0)
# },
# weight=1.0
# )
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "left_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 1.0, 0.0),
"max_angle_deg": 85.0, # 允许60度内的偏差
},
weight=5
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度
},
weight=3.0
)
# 【新增】精细对齐 (引导进入 2cm 圈)
gripper_lid_fine_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
},
weight=10.0 # 高权重
)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
"target_close_pos": 0.03,
"height_offset": 0.04,
"grasp_radius": 0.03,
},
weight=10.0
)
lid_on_table_area = RewTerm(
func=mdp.lid_on_table_area_tanh_reward,
params={
"table_cfg": SceneEntityCfg("table"),
"table_dims": (1.2, 0.8), # 桌面 XY 尺寸
"surface_height": 0.82, # 桌面 Z 高度 (相对于桌子底部)
"std_xy": 0.5, # XY 引导范围 0.5m
"std_z": 0.1, # Z 引导精度 0.1m
"height_offset": 0.07, # 抓取判定
},
weight=20.0
)
# lid_lifted_reward = RewTerm(
# func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
# },
# weight=10.0 # 给一个足够大的权重
# )
# lid_lifting_reward = RewTerm(
# func=mdp.lid_lift_progress_reward,
# params={
# "lid_cfg": SceneEntityCfg("lid"),
# "initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
# "target_height_lift": 0.2,
# "height_offset": 0.07, # 与其他奖励保持一致
# "std": 0.1
# },
# # 权重设大一点,告诉它这是最终目标
# weight=10.0
# )
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
lid_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
)
# 新增:盖子掉落判定
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
lid_dropped = DoneTerm(
func=mdp.base_height_failure, # 复用高度判定函数
params={
"asset_cfg": SceneEntityCfg("lid"),
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
},
)
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Pull-Ultrasound-Lid-Up-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,71 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 48 # 稍微增加每轮步数
max_iterations = 5000 # 增加迭代次数,给它足够的时间学习
save_interval = 100
experiment_name = "mindbot_grasp" # 建议改个名,不要叫 cartpole 了
policy = RslRlPpoActorCriticCfg(
init_noise_std=0.7,
actor_obs_normalization=True, # 开启归一化
critic_obs_normalization=True, # 开启归一化
# 增加网络容量:三层 MLP宽度足够
actor_hidden_dims=[128, 64, 32],
critic_hidden_dims=[128, 64, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005, # 保持适度的探索
num_learning_epochs=5,
num_mini_batches=8,
learning_rate=3e-4, # 如果网络变大后不稳定,可以尝试降低到 5.0e-4
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# @configclass
# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 16
# max_iterations = 150
# save_interval = 50
# experiment_name = "cartpole_direct"
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
# actor_hidden_dims=[32, 32],
# critic_hidden_dims=[32, 32],
# activation="elu",
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.005,
# num_learning_epochs=5,
# num_mini_batches=4,
# learning_rate=1.0e-3,
# schedule="adaptive",
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
from .gripper import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403

View File

@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
from isaaclab.envs.mdp import modify_term_cfg
def annealing_std(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
current_value: float,
start_step: int,
end_step: int,
start_std: float,
end_std: float
):
"""
根据步数线性退火 std 值。
Args:
current_value: 当前的参数值 (系统自动传入)
start_step: 开始退火的步数
end_step: 结束退火的步数
start_std: 初始 std
end_std: 最终 std
"""
current_step = env.common_step_counter
# 1. 还没到开始时间 -> 保持初始值 (或者不改)
if current_step < start_step:
# 如果当前值还没被设为 start_std就强制设一下否则不动
return start_std
# 2. 已经超过结束时间 -> 保持最终值
elif current_step >= end_step:
return end_std
# 3. 在中间 -> 线性插值
else:
ratio = (current_step - start_step) / (end_step - start_step)
new_std = start_std + (end_std - start_std) * ratio
return new_std

View File

@@ -0,0 +1,54 @@
# 假设这是在你的 mdp.py 文件中
from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
from isaaclab.utils import configclass
import torch
class CoupledJointPositionAction(JointPositionAction):
def __init__(self, cfg: 'CoupledJointPositionActionCfg', env):
super().__init__(cfg, env)
@property
def action_dim(self) -> int:
"""强制 ActionManager 认为只需要 1D 输入。"""
return 1
"""
这是运行时被实例化的类。它继承自 JointPositionAction。
"""
def process_actions(self, actions: torch.Tensor):
scale = self.cfg.scale
offset = self.cfg.offset
# store the raw actions
self._raw_actions[:] = torch.clamp(actions, -1, 1)
# apply the affine transformations
target_position_interval = self._raw_actions * scale + offset
right_cmd = target_position_interval
left_cmd = -target_position_interval
# import pdb; pdb.set_trace()
self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
# print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
# clip actions
if self.cfg.clip is not None:
self._processed_actions = torch.clamp(
self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
)
@configclass
class CoupledJointPositionActionCfg(JointPositionActionCfg):
"""
配置类。关键在于设置 class_type 指向上面的实现类。
"""
# !!! 关键点 !!!
# 告诉 ActionManager: "当你看到这个配置时,请实例化 CoupledJointPositionAction 这个类"
class_type = CoupledJointPositionAction
def __post_init__(self) -> None:
# 这里的检查逻辑是没问题的,因为 ActionManager 会在初始化时调用它
if len(self.joint_names) != 2:
raise ValueError("Requires exactly two joint names.")
super().__post_init__()

View File

@@ -0,0 +1,808 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
"""Reward the agent for lifting the lid above the minimal height."""
lid: RigidObject = env.scene[lid_cfg.name]
# root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
height = lid.data.root_pos_w[:, 2]
return torch.where(height > minimal_height, 1.0, 0.0)
def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for reaching the lid using tanh-kernel."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# float 化,防止成为 (3,) 向量
if not isinstance(std, float):
if torch.is_tensor(std):
std = std.item()
else:
std = float(std)
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
# body_idx = robot.find_bodies(gripper_body_name)[0]
# gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
return 1 - torch.tanh(distance / std)
def lid_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg ("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Reward the agent for grasping the lid (close and lifted)."""
# extract the used quantities (to enable type-hinting)
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
# Target lid position: (num_envs, 3)
lid_pos_w = lid.data.root_pos_w[:, :3]
# Gripper position: (num_envs, 3)
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
# Distance of the gripper to the lid: (num_envs,)
distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
# Check if close and lifted
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > 0.1
return torch.where(is_close & is_lifted, 1.0, 0.0)
def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
"""Combined reward for reaching the lid AND lifting it."""
# Get reaching reward
reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
# Get lifting reward
lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
# Combine rewards multiplicatively
return reach_reward * lift_reward
# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand__l",
# right_gripper_name: str = "left_hand_r",
# height_offset: float = 0.1,
# std: float = 0.1) -> torch.Tensor:
# """奖励函数夹爪相对于lid的位置对齐。
# Args:
# env: 环境实例
# lid_cfg: lid的配置
# robot_cfg: 机器人的配置
# left_gripper_name: 左侧夹爪body名称
# right_gripper_name: 右侧夹爪body名称
# side_distance: Y方向的期望距离
# height_offset: Z方向的期望高度偏移
# std: tanh核函数的标准差
# Returns:
# 位置对齐奖励 (num_envs,)
# """
# lid: RigidObject = env.scene[lid_cfg.name]
# robot: Articulation = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3]
# # 获取两个夹爪的位置
# left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
# right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
# left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
# right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# # 计算夹爪中心位置
# gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# # 计算相对位置(夹爪中心相对于 lid 中心)
# rel_pos = gripper_center - lid_pos_w # (num_envs, 3)
# # X 方向位置:应该对齐(接近 0
# x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
# x_reward = 1.0 - torch.tanh(x_dist / std)
# # Y 方向位置:应该在 lid 的 Y 方向两侧(距离 side_distance
# y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
# # y_error = torch.abs(y_dist - side_distance)
# # y_reward = 1.0 - torch.tanh(y_error / std)
# y_reward = 1.0 - torch.tanh(y_dist / std)
# # Z 方向位置:应该在 lid 上方 height_offset 处
# # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
# z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
# z_reward = 1.0 - torch.tanh(z_error / std)
# # 组合位置奖励
# position_reward = x_reward * y_reward * z_reward
# return position_reward
def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
height_offset: float = 0.02,
std: float = 0.1) -> torch.Tensor:
"""奖励函数夹爪相对于lid的位置对齐简化版
计算夹爪中心到目标点lid上方height_offset处的距离奖励。
Args:
env: 环境实例
lid_cfg: lid的配置
robot_cfg: 机器人的配置
left_gripper_name: 左侧夹爪body名称
right_gripper_name: 右侧夹爪body名称
height_offset: Z方向的期望高度偏移目标点在lid上方
std: tanh核函数的标准差
Returns:
位置对齐奖励 (num_envs,)
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 目标点lid位置 + height_offsetZ方向
target_pos = lid_pos_w.clone()
# print(target_pos[0])
target_pos[:, 2] += height_offset # 在lid上方height_offset处
# print(target_pos[0])
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :] # (num_envs, 3)
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :] # (num_envs, 3)
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0 # (num_envs, 3)
# print(gripper_center[0])
# 计算夹爪中心到目标点的3D距离
distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
# 距离奖励:距离越小,奖励越大
position_reward = 1.0 - torch.tanh(distance / std)
return position_reward
# def gripper_lid_orientation_alignment(
# env,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# gripper_body_name: str = "left_hand_body",
# gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
# gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
# lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
# ) -> torch.Tensor:
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 获取姿态
# lid_quat_w = lid.data.root_quat_w
# body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
# gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# # 3. 垂直对齐 (Vertical Alignment)
# # 目标:夹爪 +Y 指向 World -Z
# grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
# grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
# # print(grip_fwd_world[0])
# world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
# dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0) #add
# # 映射 [-1, 1] -> [0, 1],且全程有梯度
# # 即使是朝天 (+1),只要稍微往下转一点,分数就会增加
# # rew_vertical = (dot_vertical + 1.0) / 2.0
# val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
# rew_vertical = torch.pow(val_vertical, 2) + 1e-4
# # nan -> 0
# rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# # 4. 偏航对齐 (Yaw Alignment)
# # 目标:夹爪 +Z 平行于 Lid +Y
# grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
# grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
# lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
# lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
# dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
# rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# # nan -> 0
# rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# # 5. 组合
# total_reward = rew_vertical * rew_yaw
# total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# # debug
# if not torch.isfinite(total_reward).all():
# print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
# print(f"rew_vertical: {rew_vertical}")
# print(f"rew_yaw: {rew_yaw}")
# return total_reward
def gripper_lid_orientation_alignment(
env,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body",
gripper_forward_axis: tuple = (0.0, 0.0, 1.0), # 修正为 Z
gripper_width_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y (用于与把手平行)
lid_handle_axis: tuple = (0.0, 1.0, 0.0), # 修正为 Y
max_angle_deg: float = 60.0, # 允许的最大角度偏差(度)
) -> torch.Tensor:
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 获取姿态
lid_quat_w = lid.data.root_quat_w
body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
# 3. 垂直对齐 (Vertical Alignment)
# 目标:夹爪前向轴指向 World -Z向下
grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
# 计算点积(归一化后,点积 = cos(角度)
dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
# 计算角度(弧度)
angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
# 如果角度 <= max_angle_deg给予奖励
# 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
rew_vertical = 1.0 - angle_normalized # 角度越小,奖励越大
# 如果角度超过60度奖励为0
rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
# nan -> 0
rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
# 4. 偏航对齐 (Yaw Alignment)
# 目标:夹爪 +Z 平行于 Lid +Y
grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
# nan -> 0
rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
# 5. 组合
total_reward = rew_vertical * rew_yaw
total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
# debug
if not torch.isfinite(total_reward).all():
print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
print(f"rew_vertical: {rew_vertical}")
print(f"rew_yaw: {rew_yaw}")
return total_reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
# # 注意:确保 joint_names 只包含那两个夹爪关节
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05, # 5cm 以内视为“近”,需要闭合
# target_open_pos: float = 0.0, # 【修正】张开是 0.0
# target_close_pos: float = 0.03 # 【修正】闭合是 0.03
# ) -> torch.Tensor:
# """
# 逻辑很简单:
# 1. 如果 距离 < close_threshold目标关节位置 = 0.03 (闭合)
# 2. 如果 距离 >= close_threshold目标关节位置 = 0.0 (张开)
# 3. 返回 -abs(当前关节 - 目标关节),即惩罚误差。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算距离 (末端 vs Lid中心)
# lid_pos_w = lid.data.root_pos_w[:, :3]
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # distance: (num_envs,)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 3. 动态确定目标关节值
# # 如果距离小于阈值,目标设为 close_pos否则设为 open_pos
# # target_val: (num_envs, 1)
# # print(distance)
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 4. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# # joint_pos: (num_envs, 2)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# # 取绝对值防止左指是负数的情况虽然你的配置里范围看起来都是正的加上abs更保险
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 5. 计算误差
# # error: (num_envs,) -> 两个手指误差的平均值
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # # 6. 返回负奖励 (惩罚)
# # # 误差越大,惩罚越大。完全符合要求时奖励为 0。
# # return -error
# # =====================================================
# # 🚀 核心修复:安全输出
# # =====================================================
# # 1. 使用 tanh 限制数值范围在 [-1, 0]
# reward = -torch.tanh(error / 0.01)
# # 2. 过滤 NaN这一步能救命
# # 如果物理引擎算出 NaN这里会把它变成 0.0,防止训练崩溃
# reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
# return reward
# def gripper_open_close_reward(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# left_gripper_name: str = "left_hand_body",
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# close_threshold: float = 0.05,
# target_open_pos: float = 0.0,
# target_close_pos: float = 0.03,
# height_offset: float = 0.06, # 新增:与位置奖励保持一致
# ) -> torch.Tensor:
# """
# 逻辑:
# 1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
# 2. 如果距离 < close_threshold目标关节位置 = 闭合。
# 3. 否则:目标关节位置 = 张开。
# """
# # 1. 获取对象
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
# # 获取夹爪位置
# body_ids, _ = robot.find_bodies([left_gripper_name])
# gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
# # 3. 计算距离 (夹爪 vs 目标抓取点)
# distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
# # 4. 动态确定目标关节值
# # 靠近抓取点 -> 闭合;远离 -> 张开
# target_val = torch.where(
# distance < close_threshold,
# torch.tensor(target_close_pos, device=env.device),
# torch.tensor(target_open_pos, device=env.device)
# ).unsqueeze(1)
# # 5. 获取当前关节位置
# joint_ids, _ = robot.find_joints(joint_names)
# current_joint_pos = robot.data.joint_pos[:, joint_ids]
# current_joint_pos_abs = torch.abs(current_joint_pos)
# # 6. 计算误差并返回奖励
# error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
# # 使用 tanh 限制数值范围
# reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
# return torch.nan_to_num(reward, nan=0.0)
def _is_grasped(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg,
robot_cfg: SceneEntityCfg,
left_gripper_name: str,
right_gripper_name: str,
joint_names: list,
height_offset: float,
grasp_radius: float,
target_close_pos: float
) -> torch.Tensor:
"""
内部辅助函数:判定是否成功抓取。
条件:
1. 夹爪中心在把手目标点附近 (Sphere Check)
2. 夹爪处于闭合发力状态 (Joint Effort Check)
3. (关键) 夹爪Z轴高度合适不能压在盖子上面 (Z-Axis Check)
"""
# 1. 获取对象
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 2. 目标点位置 (把手中心)
lid_pos_w = lid.data.root_pos_w[:, :3].clone()
lid_pos_w[:, 2] += height_offset
# 3. 夹爪位置
l_ids, _ = robot.find_bodies([left_gripper_name])
r_ids, _ = robot.find_bodies([right_gripper_name])
pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
gripper_center = (pos_L + pos_R) / 2.0
# 4. 条件一:距离判定 (在把手球范围内)
dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
is_near = dist_to_handle < grasp_radius
# 5. 条件二Z轴高度判定 (防止压在盖子上)
# 夹爪中心必须在把手目标点附近,允许上下微小浮动,但不能太高
# 假设 height_offset 是把手几何中心,那么夹爪不应该比它高太多
z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
# 6. 条件三:闭合判定 (正在尝试闭合)
joint_ids, _ = robot.find_joints(joint_names)
# 获取关节位置 (绝对值)
joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# === 修改闭合判定 ===
# 1. 关节位置判定 (Effort Check)
# 只要用力了就行,去掉上限判定,防止夹太紧被误判
# 假设 0.05 是你的新 target_close_pos
is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
# 2. 几何排斥判定 (Geometry Check)
# 如果真的夹住了东西,左右指尖的距离应该被把手撑开,不会太小
# 获取指尖位置
dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
# 假设把手厚度是 1cm (0.01m)
# 如果指尖距离 < 0.005,说明两个手指已经碰到一起了(空夹),没夹住东西
# 如果指尖距离 >= 0.005,说明中间有东西挡着
is_not_empty = dist_fingertips > 0.005
# 综合判定:
# 1. 在把手附近 (is_near)
# 2. 高度合适 (is_z_valid)
# 3. 在用力闭合 (is_effort_closing)
# 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了!
return is_near & is_z_valid & is_effort_closing & is_not_empty
def gripper_close_when_near(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
target_close_pos: float = 0.03,
height_offset: float = 0.02,
grasp_radius: float = 0.05
) -> torch.Tensor:
# 1. 判定基础抓取条件是否满足
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算夹紧程度 (Clamping Intensity)
robot = env.scene[robot_cfg.name]
joint_ids, _ = robot.find_joints(joint_names)
# 获取当前关节位置绝对值 (num_envs, num_joints)
current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# 计算两个指关节的平均闭合深度
mean_joint_pos = torch.mean(current_joint_pos, dim=1)
# 计算奖励系数:当前位置 / 目标闭合位置
# 这样当关节越接近 0.03 时,奖励越接近 1.0
# 强制让 Agent 产生“压满”动作的冲动
clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
# 3. 只有在满足抓取判定时,才发放带有权重的夹紧奖励
# 如果没对准或者没夹到,奖励依然是 0
return torch.where(is_grasped, clamping_factor, 0.0)
# def gripper_close_when_near(
# env: ManagerBasedRLEnv,
# lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
# robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
# # 注意:这里我们需要具体的指尖 body 名字来做几何判定
# left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
# right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
# joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# target_close_pos: float = 0.03,
# height_offset: float = 0.03,
# grasp_radius: float = 0.02 # 球面半径 2cm
# ) -> torch.Tensor:
# # 1. 获取位置
# lid = env.scene[lid_cfg.name]
# robot = env.scene[robot_cfg.name]
# lid_pos_w = lid.data.root_pos_w[:, :3].clone()
# lid_pos_w[:, 2] += height_offset # 把手中心
# # 获取左右指尖位置
# l_ids, _ = robot.find_bodies([left_gripper_name])
# r_ids, _ = robot.find_bodies([right_gripper_name])
# pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
# pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
# # 计算夹爪中心
# pos_center = (pos_L + pos_R) / 2.0
# # 2. 距离判定 (Is Inside Sphere?)
# dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
# is_in_sphere = (dist_center < grasp_radius).float()
# # 3. "在中间"判定 (Is Between?)
# # 投影逻辑:物体投影在 LR 连线上,且位于 L 和 R 之间
# vec_LR = pos_R - pos_L # 左指指向右指
# vec_LO = lid_pos_w - pos_L # 左指指向物体
# # 计算投影比例 t
# # P_proj = P_L + t * (P_R - P_L)
# # t = (vec_LO . vec_LR) / |vec_LR|^2
# # 如果 0 < t < 1说明投影在两个指尖之间
# len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
# dot = torch.sum(vec_LO * vec_LR, dim=-1)
# t = dot / len_sq
# is_between = (t > 0.0) & (t < 1.0)
# is_between = is_between.float()
# # 4. 闭合判定
# joint_ids, _ = robot.find_joints(joint_names)
# # 注意:如果是 Binary Action可能很难拿到连续的 0~0.03 控制值,如果是仿真状态则没问题
# current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
# close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
# # 只有当非常接近闭合目标时才给分
# is_closing = (close_error < 0.005).float() # 允许 5mm 误差
# # 5. 最终奖励
# # 只有三者同时满足才给 1.0 分
# reward = is_in_sphere * is_between * is_closing
# return torch.nan_to_num(reward, nan=0.0)
# def lid_is_lifted(
# env:ManagerBasedRLEnv,
# minimal_height:float,
# lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
# ) -> torch.Tensor:
# lid = env.scene[lid_cfg.name]
# lid_height = lid.data.root_pos_w[:, 2]
# reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
# reward=torch.nan_to_num(reward, nan=0.0)
# return reward
def lid_is_lifted(
env: ManagerBasedRLEnv,
minimal_height: float = 0.05, # 相对提升阈值
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
height_offset: float = 0.03,
grasp_radius: float = 0.1,
target_close_pos: float = 0.03,
) -> torch.Tensor:
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
lid = env.scene[lid_cfg.name]
# 1. 获取高度
current_height = lid.data.root_pos_w[:, 2]
# 自动获取初始高度
initial_height = lid.data.default_root_state[:, 2]
# 2. 计算提升量
lift_height = torch.clamp(current_height - initial_height, min=0.0)
# 3. 速度检查 (防撞飞)
# 如果速度 > 1.0 m/s视为被撞飞不给分
lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
is_stable = lid_vel < 1.0
# 4. 计算奖励
# 基础分:高度越高分越高
shaping_reward = lift_height * 2.0
# 成功分:超过阈值
success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
# 组合
# 必须 is_grasped AND is_stable
reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
return torch.nan_to_num(reward, nan=0.0)
def lid_lift_success_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand_body",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
# 关键参数
settled_height: float = 0.75, # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
target_lift_height: float = 0.05, # 目标提升高度 (5cm)
grasp_dist_threshold: float = 0.05, # 抓取判定距离
closed_pos: float = 0.03 # 夹爪闭合阈值
) -> torch.Tensor:
"""
提升奖励:只有在【夹稳】的前提下,将盖子【相对】提升达到目标高度才给高分。
"""
# 1. 获取数据
lid = env.scene[lid_cfg.name]
robot = env.scene[robot_cfg.name]
# 盖子实时高度
lid_z = lid.data.root_pos_w[:, 2]
lid_pos = lid.data.root_pos_w[:, :3]
# 夹爪位置
body_ids, _ = robot.find_bodies([left_gripper_name])
gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
# 2. 【条件一:是否夹稳 (Is Grasped?)】
# 距离检查
distance = torch.norm(gripper_pos - lid_pos, dim=-1)
is_close = distance < grasp_dist_threshold
# 闭合检查
joint_ids, _ = robot.find_joints(joint_names)
joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
# 假设 > 80% 的闭合度算抓紧了
is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
is_grasped = is_close & is_closed
# 3. 【条件二:计算相对提升 (Relative Lift)】
# 当前高度 - 初始稳定高度
current_lift = lid_z - settled_height
# 4. 计算奖励
lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
# 基础提升分 (Shaping Reward)
lift_reward = lift_progress
# 成功大奖 (Success Bonus)
# 如果提升超过目标高度 (比如 > 95% 的 5cm),给额外大奖
success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
# 组合:只有在 is_grasped 为 True 时才发放提升奖励
total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
# 5. 安全输出
return torch.nan_to_num(total_reward, nan=0.0)
def lid_lift_progress_reward(
env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l",
right_gripper_name: str = "left_hand_r",
joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
initial_height: float = 0.8,
target_height_lift: float = 0.15,
height_offset: float = 0.02, # 必须与抓取奖励保持一致
grasp_radius: float = 0.1, # 提升时可以稍微放宽一点半径,防止抖动丢分
target_close_pos: float = 0.03,
std: float = 0.05 # 标准差
) -> torch.Tensor:
# 1. 判定是否夹住
is_grasped = _is_grasped(
env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
joint_names, height_offset, grasp_radius, target_close_pos
)
# 2. 计算高度
lid = env.scene[lid_cfg.name]
current_height = lid.data.root_pos_w[:, 2]
lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
# print(current_height)
# print(lift_height)
# 3. 计算奖励
# 只有在 is_grasped 为 True 时,才发放高度奖励
# 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
progress = torch.tanh(lift_height / std)
reward = torch.where(is_grasped, progress, 0.0)
return reward

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import SceneEntityCfg
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def lid_dropped(env: ManagerBasedRLEnv,
minimum_height: float = -0.05,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
return lid.data.root_pos_w[:, 2] < minimum_height
def lid_successfully_grasped(env: ManagerBasedRLEnv,
distance_threshold: float = 0.03,
height_threshold: float = 0.2,
lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
gripper_body_name: str = "left_hand_body") -> torch.Tensor:
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
body_idx = robot.find_bodies(gripper_body_name)[0]
gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
is_close = distance < distance_threshold
is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
return is_close & is_lifted
def gripper_at_lid_side(env: ManagerBasedRLEnv,
lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
left_gripper_name: str = "left_hand__l", # 改为两个下划线
right_gripper_name: str = "left_hand_r",
side_distance: float = 0.05,
side_tolerance: float = 0.01,
alignment_tolerance: float = 0.02,
height_offset: float = 0.1,
height_tolerance: float = 0.02) -> torch.Tensor:
"""Terminate when gripper center is positioned on the side of the lid at specified height.
坐标系说明:
- X 方向:两个夹爪朝中心合并的方向
- Y 方向:夹爪间空隙的方向,和 lid 的把手方向一致
- Z 方向:高度方向
"""
lid: RigidObject = env.scene[lid_cfg.name]
robot: Articulation = env.scene[robot_cfg.name]
lid_pos_w = lid.data.root_pos_w[:, :3]
# 获取两个夹爪的位置
left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
# 计算夹爪中心位置
gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
rel_pos = gripper_center - lid_pos_w
# Y 方向:应该在 lid 的 Y 方向两侧(距离 side_distance
y_dist = torch.abs(rel_pos[:, 1])
y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
# X 方向:应该对齐(接近 0
x_dist = torch.abs(rel_pos[:, 0])
x_ok = x_dist < alignment_tolerance
# Z 方向:应该在 lid 上方 height_offset 处
z_error = torch.abs(rel_pos[:, 2] - height_offset)
z_ok = z_error < height_tolerance
# 所有条件都要满足
return x_ok & y_ok & z_ok
def base_height_failure(env: ManagerBasedRLEnv,
minimum_height: float | None = None,
maximum_height: float | None = None,
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the robot's base height is outside the specified range."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
root_pos_z = asset.data.root_pos_w[:, 2]
# check if height is outside the range
out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
if minimum_height is not None:
out_of_bounds |= root_pos_z < minimum_height
if maximum_height is not None:
out_of_bounds |= root_pos_z > maximum_height
return out_of_bounds

View File

@@ -0,0 +1,488 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from re import T
from tkinter import N
import torch
from isaaclab.actuators import ImplicitActuatorCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import CurriculumTermCfg as CurrTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
from . import mdp
##
# Pre-defined configs
##
from mindbot.robot.mindbot import MINDBOT_CFG
# ====== 其他物体配置 ======
from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
TABLE_CFG=RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.95, -0.3, 0.005],
rot=[0.7071, 0.0, 0.0, 0.7071],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
LID_CFG = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Lid",
init_state=RigidObjectCfg.InitialStateCfg(
pos=[0.2, 0.65, 0.9],
rot=[1.0, 0.0, 0.0, 0.0],
lin_vel=[0.0, 0.0, 0.0],
ang_vel=[0.0, 0.0, 0.0],
),
spawn=UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
# scale=(0.2, 0.2, 0.2),
rigid_props=RigidBodyPropertiesCfg(
rigid_body_enabled= True,
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=0.5,#original 5.0
linear_damping=5.0, #original 0.5
angular_damping=5.0, #original 0.5
disable_gravity=False,
),
collision_props=CollisionPropertiesCfg(
collision_enabled=True,
contact_offset=0.0005,#original 0.02
rest_offset=0
),
),
)
ULTRASOUND_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=1.0
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,#
solver_position_iteration_count=32,
solver_velocity_iteration_count=16,
stabilization_threshold=1e-6,
# fix_root_link=True,
),
),
init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
# actuators={}
actuators={
"passive_damper": ImplicitActuatorCfg(
# ".*" 表示匹配该USD文件内的所有关节无论是轮子、屏幕转轴还是其他
joint_names_expr=[".*"],
stiffness=0.0,
damping=1000.0,
effort_limit_sim=10000.0,
velocity_limit_sim=100.0,
),
}
)
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
asset = env.scene[asset_cfg.name]
body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
return pos_w
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# 使用任务空间控制器:策略输出末端执行器位置+姿态增量6维
left_arm_ee = DifferentialInverseKinematicsActionCfg(
asset_name="Mindbot",
joint_names=["l_joint[1-6]"], # 左臂的6个关节
body_name="left_hand_body", # 末端执行器body名称
controller=DifferentialIKControllerCfg(
command_type="pose", # 控制位置+姿态
use_relative_mode=True, # 相对模式:动作是增量
ik_method="dls", # Damped Least Squares方法
),
scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
)
# right_arm_fixed = mdp.JointPositionActionCfg(
# asset_name="Mindbot",
# joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
# # 1. 缩放设为 0这让 RL 策略无法控制它,它不会动
# scale=0.0,
# # 2. 偏移设为你的目标角度(弧度):这告诉电机“永远停在这里”
# # 对应 (135, -70, -90, 90, 90, -70)
# offset={
# "r_joint1": 2.3562,
# "r_joint2": -1.2217,
# "r_joint3": -1.5708,
# "r_joint4": 1.5708,
# "r_joint5": 1.5708,
# "r_joint6": -1.2217,
# },
# )
grippers_position = mdp.BinaryJointPositionActionCfg(
asset_name="Mindbot",
joint_names=["left_hand_joint_left", "left_hand_joint_right"],
# 修正:使用字典格式
# open_command_expr={"关节名正则": 值}
open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
# close_command_expr={"关节名正则": 值}
close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
)
trunk_position = mdp.JointPositionActionCfg(
asset_name="Mindbot",
joint_names=["PrismaticJoint"],
scale=0.00,
offset=0.3,
clip=None
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# 重置所有关节到 init_state无偏移must be
reset_mindbot_all_joints = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"position_range": (0.0, 0.0),
"velocity_range": (0.0, 0.0),
},
)
# 2. 底座安装误差 (建议缩小到 2cm)
reset_mindbot_base = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot"),
"pose_range": {
"x": (-0.05, 0.05),
"y": (-0.05, 0.05),
"z": (0.74, 0.75),
"yaw": (-0.1, 0.1),
},
"velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
},
)
# 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度,很合适)
reset_mindbot_left_arm = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
"position_range": (-0.01, 0.01),
"velocity_range": (0.0, 0.0),
},
)
reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
reset_ultrasound = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("ultrasound"),
"pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
reset_lid = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("lid"),
"pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
"velocity_range": {"x": (0.0, 0.0)}
}
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
gripper_lid_orientation_alignment = RewTerm(
func=mdp.gripper_lid_orientation_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"gripper_body_name": "left_hand_body",
"gripper_forward_axis": (0.0, 0.0, 1.0),
"gripper_width_axis": (0.0, 1.0, 0.0),
"lid_handle_axis": (0.0, 1.0, 0.0),
"max_angle_deg": 85.0, # 允许60度内的偏差
},
weight=5
)
# stage 2
# 位置对齐奖励:鼓励夹爪位于 lid 两侧,且 Z 方向在 lid 上方 0.1m
gripper_lid_position_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # Z方向lid 上方 0.1m
"std": 0.3, # 位置对齐的容忍度
},
weight=3.0
)
# 【新增】精细对齐 (引导进入 2cm 圈)
gripper_lid_fine_alignment = RewTerm(
func=mdp.gripper_lid_position_alignment,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"height_offset": 0.07, # 【注意】这个高度必须非常准,和 interaction 判定高度一致
"std": 0.05, # 非常陡峭的梯度,只在 5-10cm 内有效
},
weight=10.0 # 高权重
)
gripper_close_interaction = RewTerm(
func=mdp.gripper_close_when_near,
params={
"lid_cfg": SceneEntityCfg("lid"),
"robot_cfg": SceneEntityCfg("Mindbot"),
"left_gripper_name": "left_hand__l",
"right_gripper_name": "left_hand_r",
"joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
"target_close_pos": 0.03,
"height_offset": 0.04,
"grasp_radius": 0.03,
},
weight=10.0
)
lid_lifted_reward = RewTerm(
func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
params={
"lid_cfg": SceneEntityCfg("lid"),
"minimal_height": 1.0, # 根据你的场景调整,比初始高度高 5cm 左右
},
weight=10.0 # 给一个足够大的权重
)
lid_lifting_reward = RewTerm(
func=mdp.lid_lift_progress_reward,
params={
"lid_cfg": SceneEntityCfg("lid"),
"initial_height": 0.8, # 请务必核实这个值!在 print 里看一下 lid 的 z 坐标
"target_height_lift": 0.2,
"height_offset": 0.07, # 与其他奖励保持一致
"std": 0.1
},
# 权重设大一点,告诉它这是最终目标
weight=10.0
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
mindbot_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
)
lid_fly_away = DoneTerm(
func=mdp.base_height_failure,
params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
)
# 新增:盖子掉落判定
# 如果盖子掉回桌面(高度接近初始高度),且已经运行了一段时间(避开刚开始重置的瞬间)
lid_dropped = DoneTerm(
func=mdp.base_height_failure, # 复用高度判定函数
params={
"asset_cfg": SceneEntityCfg("lid"),
"minimum_height": 0.79, # 假设初始高度是 0.9,低于 0.88 说明掉下去了或者被砸进去了
},
)
##
# Environment configuration
##
@configclass
class CurriculumCfg:
pass
# action_rate = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
# joint_vel = CurrTerm(func=mdp.modify_reward_weight,
# params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
# position_std_scheduler = CurrTerm(
# func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
# params={
# # 路径rewards管理器 -> 你的奖励项名 -> params字典 -> std键
# "address": "rewards.gripper_lid_position_alignment.params.std",
# # 修改逻辑:使用我们刚才写的函数
# "modify_fn": mdp.annealing_std,
# # 传给函数的参数
# "modify_params": {
# "start_step": 00, # 约 600 轮
# "end_step": 5_000, # 约 1200 轮
# "start_std": 0.3,
# "end_std": 0.05,
# }
# }
# )
@configclass
class PullUltraSoundLidUpEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# curriculum: CurriculumCfg = CurriculumCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 4 #original 2 # 从 2 增加到 4控制频率从 25Hz 降到 12.5Hz
self.episode_length_s = 10
# viewer settings
self.viewer.eye = (3.5, 0.0, 3.2)
# simulation settings
self.sim.dt = 1 / 120 #original 1 / 60
self.sim.render_interval = self.decimation
# # 1. 基础堆内存
self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024 # 256MB
self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024 # 32万个 patch
# # 5. 聚合对容量 (针对复杂的 Articulation)
self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024