4090工作站代码

2026-01-28 20:16:58 +08:00
parent 1f7053a306
commit 2a201d5bab
98 changed files with 9887 additions and 1063 deletions
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -2,11 +2,11 @@
 	// See http://go.microsoft.com/fwlink/?LinkId=827846
 	// for the documentation about the extensions.json format
 	"recommendations": [
+		"ms-vscode.cpptools",
 		"ms-python.python",
-		"ms-python.vscode-pylance",
+        "ms-python.vscode-pylance",
 		"ban.spellright",
 		"ms-iot.vscode-ros",
-		"ms-python.black-formatter",
-		"ms-python.flake8",
+		"ExecutableBookProject.myst-highlight",
 	]
 }
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -1,23 +1,29 @@
 {
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
    "version": "2.0.0",
    "tasks": [
        {
+            // setup python env
            "label": "setup_python_env",
            "type": "shell",
            "linux": {
-                "command": "${input:isaac_path}/python.sh ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
+                "command": "${workspaceFolder}/isaaclab.sh -p ${workspaceFolder}/.vscode/tools/setup_vscode.py"
            },
            "windows": {
-                "command": "${input:isaac_path}/python.bat ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
+                "command": "${workspaceFolder}/isaaclab.bat -p ${workspaceFolder}/.vscode/tools/setup_vscode.py"
+            }
+        },
+        {
+            // run formatter
+            "label": "run_formatter",
+            "type": "shell",
+            "linux": {
+                "command": "${workspaceFolder}/isaaclab.sh --format"
+            },
+            "windows": {
+                "command": "${workspaceFolder}/isaaclab.bat --format"
            }
        }
-    ],
-    "inputs": [
-        {
-            "id": "isaac_path",
-            "description": "Absolute path to the current Isaac Sim installation. If you installed IsaacSim from pip, the import of it failed. Please make sure you run the task with the correct python environment. As fallback, you can directly execute the python script by running: ``python.sh <path-to-your-project>/.vscode/tools/setup_vscode.py``",
-            "default": "${HOME}/isaacsim",
-            "type": "promptString"
-        },
    ]
-}
+}
--- a/.vscode/tools/launch.template.json
+++ b/.vscode/tools/launch.template.json
@@ -4,822 +4,54 @@
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
-        // For standalone script execution
        {
            "name": "Python: Current File",
-            "type": "debugpy",
+            "type": "python",
            "request": "launch",
            "program": "${file}",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
+            "console": "integratedTerminal"
        },
        {
-            "name": "Python: Play Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
+            "name": "Python: Attach (windows-x86_64/linux-x86_64)",
+            "type": "python",
+            "request": "attach",
+            "port": 3000,
+            "host": "localhost"
        },
        {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
+            "name": "Python: Train Environment",
+            "type": "python",
            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
+            "args" : ["--task", "Isaac-Reach-Franka-v0", "--headless"],
+            "program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/train.py",
+            "console": "integratedTerminal"
        },
        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
+            "name": "Python: Play Environment",
+            "type": "python",
            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
+            "args" : ["--task", "Isaac-Reach-Franka-v0", "--num_envs", "32"],
+            "program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/play.py",
+            "console": "integratedTerminal"
        },
        {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
+            "name": "Python: SinglePytest",
+            "type": "python",
            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
+            "module": "pytest",
+            "args": [
+              "${file}"
+            ],
+            "console": "integratedTerminal"
        },
        {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
+            "name": "Python: ALL Pytest",
+            "type": "python",
            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
+            "module": "pytest",
+            "args": ["source/isaaclab/test"],
            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        // For script execution inside a Docker
-        {
-            "name": "Docker: Current File",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${file}",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            }
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
+            "justMyCode": false
+        }
    ]
-}
+}
--- a/.vscode/tools/settings.template.json
+++ b/.vscode/tools/settings.template.json
@@ -1,4 +1,9 @@
 {
+    "files.exclude": {
+        "**/.mypy_cache": true,
+        "**/__pycache__": true,
+        "**/*.egg-info": true
+    },
    "files.associations": {
        "*.tpp": "cpp",
        "*.kit": "toml",
@@ -40,7 +45,6 @@
        "teleoperation",
        "xform",
        "numpy",
-        "tensordict",
        "flatcache",
        "physx",
        "dpad",
@@ -51,7 +55,9 @@
        "arange",
        "discretization",
        "trimesh",
-        "uninstanceable"
+        "uninstanceable",
+        "coeff",
+        "prestartup"
    ],
    // This enables python language server. Seems to work slightly better than jedi:
    "python.languageServer": "Pylance",
@@ -70,7 +76,7 @@
    // Python environment path
    // note: the default interpreter is overridden when user selects a workspace interpreter
    //     in the status bar. For example, the virtual environment python interpreter
-    "python.defaultInterpreterPath": "",
+    "python.defaultInterpreterPath": "${workspaceFolder}/_isaac_sim/python.sh",
    // ROS distribution
    "ros.distro": "noetic",
    // Language specific settings
@@ -81,6 +87,6 @@
        "editor.tabSize": 2
    },
    // Python extra paths
-    // Note: this is filled up when vscode is set up for the first time
+    // Note: this is filled up when "./isaaclab.sh -i" is run
    "python.analysis.extraPaths": []
 }
--- a/.vscode/tools/setup_vscode.py
+++ b/.vscode/tools/setup_vscode.py
@@ -12,34 +12,21 @@ This is necessary because Isaac Sim 2022.2.1 onwards does not add the necessary
 when the "setup_python_env.sh" is run as part of the vs-code launch configuration.
 """

-import argparse
-import os
-import pathlib
-import platform
 import re
 import sys
+import os
+import pathlib

-PROJECT_DIR = pathlib.Path(__file__).parents[2]
-"""Path to the the project directory."""
+
+ISAACLAB_DIR = pathlib.Path(__file__).parents[2]
+"""Path to the Isaac Lab directory."""

 try:
    import isaacsim  # noqa: F401

    isaacsim_dir = os.environ.get("ISAAC_PATH", "")
 except ModuleNotFoundError or ImportError:
-    # Create a parser to get the isaac-sim path
-    parser = argparse.ArgumentParser(description="Setup the VSCode settings for the project.")
-    parser.add_argument("--isaac_path", type=str, help="The absolute path to the Isaac Sim installation.")
-    args = parser.parse_args()
-
-    # parse the isaac-sim directory
-    isaacsim_dir = args.isaac_path
-    # check if the isaac-sim directory is provided
-    if not os.path.exists(isaacsim_dir):
-        raise FileNotFoundError(
-            f"Could not find the isaac-sim directory: {isaacsim_dir}. Please provide the correct path to the Isaac Sim"
-            " installation."
-        )
+    isaacsim_dir = os.path.join(ISAACLAB_DIR, "_isaac_sim")
 except EOFError:
    print("Unable to trigger EULA acceptance. This is likely due to the script being run in a non-interactive shell.")
    print("Please run the script in an interactive shell to accept the EULA.")
@@ -49,11 +36,11 @@ except EOFError:
 # check if the isaac-sim directory exists
 if not os.path.exists(isaacsim_dir):
    raise FileNotFoundError(
-        f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:\n\t1. The"
-        " Isaac Sim directory does not exist as provided CLI path.\n\t2. The script couldn't import the 'isaacsim'"
-        " package. This could be due to the 'isaacsim' package not being installed in the Python"
-        " environment.\n\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is"
-        " installed."
+        f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:"
+        f"\n\t1. The Isaac Sim directory does not exist as a symlink at: {os.path.join(ISAACLAB_DIR, '_isaac_sim')}"
+        "\n\t2. The script could not import the 'isaacsim' package. This could be due to the 'isaacsim' package not "
+        "being installed in the Python environment.\n"
+        "\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is installed."
    )

 ISAACSIM_DIR = isaacsim_dir
@@ -98,7 +85,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
        path_names = [path_name for path_name in path_names if len(path_name) > 0]

        # change the path names to be relative to the Isaac Lab directory
-        rel_path = os.path.relpath(ISAACSIM_DIR, PROJECT_DIR)
+        rel_path = os.path.relpath(ISAACSIM_DIR, ISAACLAB_DIR)
        path_names = ['"${workspaceFolder}/' + rel_path + "/" + path_name + '"' for path_name in path_names]
    else:
        path_names = []
@@ -111,7 +98,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
        )

    # add the path names that are in the Isaac Lab extensions directory
-    isaaclab_extensions = os.listdir(os.path.join(PROJECT_DIR, "source"))
+    isaaclab_extensions = os.listdir(os.path.join(ISAACLAB_DIR, "source"))
    path_names.extend(['"${workspaceFolder}/source/' + ext + '"' for ext in isaaclab_extensions])

    # combine them into a single string
@@ -144,17 +131,15 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
        The settings string with overwritten default python interpreter.
    """
    # read executable name
-    python_exe = os.path.normpath(sys.executable)
-
-    # replace with Isaac Sim's python.sh or python.bat scripts to make sure python with correct
-    # source paths is set as default
-    if f"kit{os.sep}python{os.sep}bin{os.sep}python" in python_exe:
-        # Check if the OS is Windows or Linux to use appropriate shell file
-        if platform.system() == "Windows":
-            python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.bat")
-        else:
-            python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.sh")
+    python_exe = sys.executable.replace("\\", "/")

+    # We make an exception for replacing the default interpreter if the
+    # path (/kit/python/bin/python3) indicates that we are using a local/container
+    # installation of IsaacSim. We will preserve the calling script as the default, python.sh.
+    # We want to use python.sh because it modifies LD_LIBRARY_PATH and PYTHONPATH
+    # (among other envars) that we need for all of our dependencies to be accessible.
+    if "kit/python/bin/python3" in python_exe:
+        return isaaclab_settings
    # replace the default python interpreter in the Isaac Lab settings file with the path to the
    # python interpreter in the Isaac Lab directory
    isaaclab_settings = re.sub(
@@ -169,7 +154,7 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:

 def main():
    # Isaac Lab template settings
-    isaaclab_vscode_template_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "settings.template.json")
+    isaaclab_vscode_template_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "settings.template.json")
    # make sure the Isaac Lab template settings file exists
    if not os.path.exists(isaaclab_vscode_template_filename):
        raise FileNotFoundError(
@@ -195,13 +180,13 @@ def main():
    isaaclab_settings = header_message + isaaclab_settings

    # write the Isaac Lab settings file
-    isaaclab_vscode_filename = os.path.join(PROJECT_DIR, ".vscode", "settings.json")
+    isaaclab_vscode_filename = os.path.join(ISAACLAB_DIR, ".vscode", "settings.json")
    with open(isaaclab_vscode_filename, "w") as f:
        f.write(isaaclab_settings)

    # copy the launch.json file if it doesn't exist
-    isaaclab_vscode_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "launch.json")
-    isaaclab_vscode_template_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "launch.template.json")
+    isaaclab_vscode_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "launch.json")
+    isaaclab_vscode_template_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "launch.template.json")
    if not os.path.exists(isaaclab_vscode_launch_filename):
        # read template launch settings
        with open(isaaclab_vscode_template_launch_filename) as f:
--- a/scripts/rsl_rl/train.py
+++ b/scripts/rsl_rl/train.py
@@ -184,6 +184,22 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
    # wrap around environment for rsl-rl
    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)

+    # # 添加pdb断点来打印left_arm_ee
+    # import pdb
+    # original_step = env.step
+    # def step_with_debug(action):
+    #     result = original_step(action)
+    #     # 访问底层环境的action_manager
+    #     unwrapped_env = env.unwrapped
+    #     if hasattr(unwrapped_env, 'action_manager'):
+    #         left_arm_ee_term = unwrapped_env.action_manager.get_term("left_arm_ee")
+    #         if left_arm_ee_term is not None:
+    #             print(f"left_arm_ee raw_actions: {left_arm_ee_term.raw_actions}")
+    #             print(f"left_arm_ee processed_actions: {left_arm_ee_term.processed_actions}")
+    #             pdb.set_trace()  # 在这里设置断点
+    #     return result
+    # env.step = step_with_debug
+
    # create runner from rsl-rl
    if agent_cfg.class_name == "OnPolicyRunner":
        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
--- a/source/mindbot/mindbot
+++ b/source/mindbot/mindbot
@@ -0,0 +1,14 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Python module serving as a project/extension template.
+"""
+
+# Register Gym environments.
+from .tasks import *
+
+# Register UI extensions.
+from .ui_extension_example import *
--- a/source/mindbot/mindbot
+++ b/source/mindbot/mindbot
@@ -0,0 +1,109 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configuration for a simple Cartpole robot."""
+
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
+
+##
+# Configuration
+##
+
+MINDBOT_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        # 1. UPDATE THE USD PATH to your local file
+        usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+            max_depenetration_velocity=1.0,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=True,
+            solver_position_iteration_count=8,
+            solver_velocity_iteration_count=4,
+            stabilization_threshold=1e-6,  # 忽略小穿透
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        # 2. DEFINE THE INITIAL POSITIONS FOR ALL 12 JOINTS
+        # All are set to 0.0 here, but you should adjust them for a suitable "home" position.
+        joint_pos={
+            # Left arm joints
+            "l_joint1": 0.0,
+            # "l_joint2": -1.5708,
+            "l_joint2": 0.0,
+            "l_joint3": 0.0,
+            "l_joint4": 0.0,
+            "l_joint5": 0.0,
+            "l_joint6": 0.0,
+            # Right arm joints
+            "r_joint1": 0.0,
+            "r_joint2": -1.5708,
+            "r_joint3": 0.0,
+            "r_joint4": 0.0,
+            "r_joint5": 0.0,
+            "r_joint6": 0.0,
+            # left wheel
+            "left_b_revolute_Joint": 0.0,
+            "left_f_revolute_Joint": 0.0,
+            # right wheel
+            "right_b_revolute_Joint": 0.0,
+            "right_f_revolute_Joint": 0.0,
+            # left gripper
+            # 注意：如果希望初始状态为完全打开，应该设置为 left=0.0, right=0.0
+            # 当前设置为 right=0.01 表示略微闭合状态
+            "left_hand_joint_left": 0.0,    # 范围：(-0.03, 0.0)，0.0=打开，-0.03=闭合
+            "left_hand_joint_right": 0.01,  # 范围：(0.0, 0.03)，0.0=打开，0.03=闭合
+            # right gripper
+            "right_hand_joint_left": 0.0,
+            "right_hand_joint_right": 0.01,
+            # trunk
+            "PrismaticJoint": 0.3,
+            # head
+            "head_revoluteJoint": 0.5236
+        },
+        # The initial (x, y, z) position of the robot's base in the world
+        pos=(0, 0, 0.05),
+    ),
+    actuators={
+        # 3. DEFINE ACTUATOR GROUPS FOR THE ARMS
+        # Group for the 6 left arm joints using a regular expression
+        "left_arm": ImplicitActuatorCfg(
+            joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
+            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
+            velocity_limit_sim=2.0,          # Note: Tune this based on your robot's specs
+            stiffness=100000.0,                  # Note: Tune this for desired control performance
+            damping=10000.0,                     # Note: Tune this for desired control performance
+        ),
+        # Group for the 6 right arm joints using a regular expression
+        "right_arm": ImplicitActuatorCfg(
+            joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
+            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
+            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
+            stiffness=100000.0,                  # Note: Tune this for desired control performance
+            damping=10000.0,                     # Note: Tune this for desired control performance
+        ),
+        "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
+        "trunk": ImplicitActuatorCfg(
+            joint_names_expr=["PrismaticJoint"],
+            stiffness=2000.0,    # 从 10000 增：强持位
+            damping=200.0,      # 从 1000 增：抗振荡
+            effort_limit_sim=1000.0,
+            velocity_limit_sim=0.5,
+        ),
+        "wheels": ImplicitActuatorCfg(
+            joint_names_expr=[".*_revolute_Joint"], 
+            stiffness=0.0, 
+            damping=100.0,
+            effort_limit_sim=200.0,  # <<<<<< 新增：力矩上限，足够驱动轮子
+            velocity_limit_sim=50.0, # <<<<<< 新增：速度上限，防止过快
+        ),
+        "grippers": ImplicitActuatorCfg(joint_names_expr=[".*_hand_joint.*"], stiffness=1000.0, damping=100.0),
+    },
+)
--- a/source/mindbot/mindbot
+++ b/source/mindbot/mindbot
@@ -0,0 +1,17 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Package containing task implementations for the extension."""
+
+##
+# Register Gym environments.
+##
+
+from isaaclab_tasks.utils import import_packages
+
+# The blacklist is used to prevent importing configs from sub-packages
+_BLACKLIST_PKGS = ["utils", ".mdp"]
+# Import all configs in this package
+import_packages(__name__, _BLACKLIST_PKGS)
--- a/copy/tasks/direct/init.py
+++ b/copy/tasks/direct/init.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym  # noqa: F401
--- a/copy/tasks/direct/mindbot/init.py
+++ b/copy/tasks/direct/mindbot/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Template-Mindbot-Direct-v0",
+    entry_point=f"{__name__}.mindbot_env:MindbotEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/init.py
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml
--- a/copy/tasks/direct/mindbot/mindbot_env.py
+++ b/copy/tasks/direct/mindbot/mindbot_env.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import math
+import torch
+from collections.abc import Sequence
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation
+from isaaclab.envs import DirectRLEnv
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+from isaaclab.utils.math import sample_uniform
+
+from .mindbot_env_cfg import MindbotEnvCfg
+
+
+class MindbotEnv(DirectRLEnv):
+    cfg: MindbotEnvCfg
+
+    def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
+        self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
+
+        self.joint_pos = self.robot.data.joint_pos
+        self.joint_vel = self.robot.data.joint_vel
+
+    def _setup_scene(self):
+        self.robot = Articulation(self.cfg.robot_cfg)
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate
+        self.scene.clone_environments(copy_from_source=False)
+        # we need to explicitly filter collisions for CPU simulation
+        if self.device == "cpu":
+            self.scene.filter_collisions(global_prim_paths=[])
+        # add articulation to scene
+        self.scene.articulations["robot"] = self.robot
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: torch.Tensor) -> None:
+        self.actions = actions.clone()
+
+    def _apply_action(self) -> None:
+        self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
+
+    def _get_observations(self) -> dict:
+        obs = torch.cat(
+            (
+                self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
+                self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
+                self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
+                self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
+            ),
+            dim=-1,
+        )
+        observations = {"policy": obs}
+        return observations
+
+    def _get_rewards(self) -> torch.Tensor:
+        total_reward = compute_rewards(
+            self.cfg.rew_scale_alive,
+            self.cfg.rew_scale_terminated,
+            self.cfg.rew_scale_pole_pos,
+            self.cfg.rew_scale_cart_vel,
+            self.cfg.rew_scale_pole_vel,
+            self.joint_pos[:, self._pole_dof_idx[0]],
+            self.joint_vel[:, self._pole_dof_idx[0]],
+            self.joint_pos[:, self._cart_dof_idx[0]],
+            self.joint_vel[:, self._cart_dof_idx[0]],
+            self.reset_terminated,
+        )
+        return total_reward
+
+    def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
+        self.joint_pos = self.robot.data.joint_pos
+        self.joint_vel = self.robot.data.joint_vel
+
+        time_out = self.episode_length_buf >= self.max_episode_length - 1
+        out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
+        out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
+        return out_of_bounds, time_out
+
+    def _reset_idx(self, env_ids: Sequence[int] | None):
+        if env_ids is None:
+            env_ids = self.robot._ALL_INDICES
+        super()._reset_idx(env_ids)
+
+        joint_pos = self.robot.data.default_joint_pos[env_ids]
+        joint_pos[:, self._pole_dof_idx] += sample_uniform(
+            self.cfg.initial_pole_angle_range[0] * math.pi,
+            self.cfg.initial_pole_angle_range[1] * math.pi,
+            joint_pos[:, self._pole_dof_idx].shape,
+            joint_pos.device,
+        )
+        joint_vel = self.robot.data.default_joint_vel[env_ids]
+
+        default_root_state = self.robot.data.default_root_state[env_ids]
+        default_root_state[:, :3] += self.scene.env_origins[env_ids]
+
+        self.joint_pos[env_ids] = joint_pos
+        self.joint_vel[env_ids] = joint_vel
+
+        self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
+        self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
+        self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
+
+
+@torch.jit.script
+def compute_rewards(
+    rew_scale_alive: float,
+    rew_scale_terminated: float,
+    rew_scale_pole_pos: float,
+    rew_scale_cart_vel: float,
+    rew_scale_pole_vel: float,
+    pole_pos: torch.Tensor,
+    pole_vel: torch.Tensor,
+    cart_pos: torch.Tensor,
+    cart_vel: torch.Tensor,
+    reset_terminated: torch.Tensor,
+):
+    rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
+    rew_termination = rew_scale_terminated * reset_terminated.float()
+    rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
+    rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
+    rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
+    total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
+    return total_reward
--- a/copy/tasks/direct/mindbot/mindbot_env_cfg.py
+++ b/copy/tasks/direct/mindbot/mindbot_env_cfg.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
+
+from isaaclab.assets import ArticulationCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.utils import configclass
+
+
+@configclass
+class MindbotEnvCfg(DirectRLEnvCfg):
+    # env
+    decimation = 2
+    episode_length_s = 5.0
+    # - spaces definition
+    action_space = 1
+    observation_space = 4
+    state_space = 0
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
+
+    # robot(s)
+    robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
+
+    # custom parameters/scales
+    # - controllable joint
+    cart_dof_name = "slider_to_cart"
+    pole_dof_name = "cart_to_pole"
+    # - action scale
+    action_scale = 100.0  # [N]
+    # - reward scales
+    rew_scale_alive = 1.0
+    rew_scale_terminated = -2.0
+    rew_scale_pole_pos = -1.0
+    rew_scale_cart_vel = -0.01
+    rew_scale_pole_vel = -0.005
+    # - reset states/conditions
+    initial_pole_angle_range = [-0.25, 0.25]  # pole angle sample range on reset [rad]
+    max_cart_pos = 3.0  # reset if cart exceeds this position [m]
--- a/copy/tasks/direct/mindbot_marl/init.py
+++ b/copy/tasks/direct/mindbot_marl/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Template-Mindbot-Marl-Direct-v0",
+    entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/copy/tasks/direct/mindbot_marl/agents/init.py
+++ b/copy/tasks/direct/mindbot_marl/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/copy/tasks/direct/mindbot_marl/agents/rl_games_ppo_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/copy/tasks/direct/mindbot_marl/agents/rsl_rl_ppo_cfg.py
+++ b/copy/tasks/direct/mindbot_marl/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 16
+    max_iterations = 150
+    save_interval = 50
+    experiment_name = "cartpole_direct"
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[32, 32],
+        critic_hidden_dims=[32, 32],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
--- a/copy/tasks/direct/mindbot_marl/agents/sb3_ppo_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/copy/tasks/direct/mindbot_marl/agents/skrl_amp_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/copy/tasks/direct/mindbot_marl/agents/skrl_ippo_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/copy/tasks/direct/mindbot_marl/agents/skrl_mappo_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/copy/tasks/direct/mindbot_marl/agents/skrl_ppo_cfg.yaml
+++ b/copy/tasks/direct/mindbot_marl/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/copy/tasks/direct/mindbot_marl/mindbot_marl_env.py
+++ b/copy/tasks/direct/mindbot_marl/mindbot_marl_env.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import math
+import torch
+from collections.abc import Sequence
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation
+from isaaclab.envs import DirectMARLEnv
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+from isaaclab.utils.math import sample_uniform
+
+from .mindbot_marl_env_cfg import MindbotMarlEnvCfg
+
+
+class MindbotMarlEnv(DirectMARLEnv):
+    cfg: MindbotMarlEnvCfg
+
+    def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
+        self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
+        self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name)
+
+        self.joint_pos = self.robot.data.joint_pos
+        self.joint_vel = self.robot.data.joint_vel
+
+    def _setup_scene(self):
+        self.robot = Articulation(self.cfg.robot_cfg)
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate
+        self.scene.clone_environments(copy_from_source=False)
+        # we need to explicitly filter collisions for CPU simulation
+        if self.device == "cpu":
+            self.scene.filter_collisions(global_prim_paths=[])
+        # add articulation to scene
+        self.scene.articulations["robot"] = self.robot
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
+        self.actions = actions
+
+    def _apply_action(self) -> None:
+        self.robot.set_joint_effort_target(
+            self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx
+        )
+        self.robot.set_joint_effort_target(
+            self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx
+        )
+
+    def _get_observations(self) -> dict[str, torch.Tensor]:
+        pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1))
+        pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1))
+        observations = {
+            "cart": torch.cat(
+                (
+                    self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
+                    self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
+                    pole_joint_pos,
+                    self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
+                ),
+                dim=-1,
+            ),
+            "pendulum": torch.cat(
+                (
+                    pole_joint_pos + pendulum_joint_pos,
+                    pendulum_joint_pos,
+                    self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1),
+                ),
+                dim=-1,
+            ),
+        }
+        return observations
+
+    def _get_rewards(self) -> dict[str, torch.Tensor]:
+        total_reward = compute_rewards(
+            self.cfg.rew_scale_alive,
+            self.cfg.rew_scale_terminated,
+            self.cfg.rew_scale_cart_pos,
+            self.cfg.rew_scale_cart_vel,
+            self.cfg.rew_scale_pole_pos,
+            self.cfg.rew_scale_pole_vel,
+            self.cfg.rew_scale_pendulum_pos,
+            self.cfg.rew_scale_pendulum_vel,
+            self.joint_pos[:, self._cart_dof_idx[0]],
+            self.joint_vel[:, self._cart_dof_idx[0]],
+            normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
+            self.joint_vel[:, self._pole_dof_idx[0]],
+            normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
+            self.joint_vel[:, self._pendulum_dof_idx[0]],
+            math.prod(self.terminated_dict.values()),
+        )
+        return total_reward
+
+    def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
+        self.joint_pos = self.robot.data.joint_pos
+        self.joint_vel = self.robot.data.joint_vel
+
+        time_out = self.episode_length_buf >= self.max_episode_length - 1
+        out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
+        out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
+
+        terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
+        time_outs = {agent: time_out for agent in self.cfg.possible_agents}
+        return terminated, time_outs
+
+    def _reset_idx(self, env_ids: Sequence[int] | None):
+        if env_ids is None:
+            env_ids = self.robot._ALL_INDICES
+        super()._reset_idx(env_ids)
+
+        joint_pos = self.robot.data.default_joint_pos[env_ids]
+        joint_pos[:, self._pole_dof_idx] += sample_uniform(
+            self.cfg.initial_pole_angle_range[0] * math.pi,
+            self.cfg.initial_pole_angle_range[1] * math.pi,
+            joint_pos[:, self._pole_dof_idx].shape,
+            joint_pos.device,
+        )
+        joint_pos[:, self._pendulum_dof_idx] += sample_uniform(
+            self.cfg.initial_pendulum_angle_range[0] * math.pi,
+            self.cfg.initial_pendulum_angle_range[1] * math.pi,
+            joint_pos[:, self._pendulum_dof_idx].shape,
+            joint_pos.device,
+        )
+        joint_vel = self.robot.data.default_joint_vel[env_ids]
+
+        default_root_state = self.robot.data.default_root_state[env_ids]
+        default_root_state[:, :3] += self.scene.env_origins[env_ids]
+
+        self.joint_pos[env_ids] = joint_pos
+        self.joint_vel[env_ids] = joint_vel
+
+        self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
+        self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
+        self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
+
+
+@torch.jit.script
+def normalize_angle(angle):
+    return (angle + math.pi) % (2 * math.pi) - math.pi
+
+
+@torch.jit.script
+def compute_rewards(
+    rew_scale_alive: float,
+    rew_scale_terminated: float,
+    rew_scale_cart_pos: float,
+    rew_scale_cart_vel: float,
+    rew_scale_pole_pos: float,
+    rew_scale_pole_vel: float,
+    rew_scale_pendulum_pos: float,
+    rew_scale_pendulum_vel: float,
+    cart_pos: torch.Tensor,
+    cart_vel: torch.Tensor,
+    pole_pos: torch.Tensor,
+    pole_vel: torch.Tensor,
+    pendulum_pos: torch.Tensor,
+    pendulum_vel: torch.Tensor,
+    reset_terminated: torch.Tensor,
+):
+    rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
+    rew_termination = rew_scale_terminated * reset_terminated.float()
+    rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
+    rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
+        torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
+    )
+    rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
+    rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
+    rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
+
+    total_reward = {
+        "cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
+        "pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
+    }
+    return total_reward
--- a/copy/tasks/direct/mindbot_marl/mindbot_marl_env_cfg.py
+++ b/copy/tasks/direct/mindbot_marl/mindbot_marl_env_cfg.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG
+
+from isaaclab.assets import ArticulationCfg
+from isaaclab.envs import DirectMARLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.utils import configclass
+
+
+@configclass
+class MindbotMarlEnvCfg(DirectMARLEnvCfg):
+    # env
+    decimation = 2
+    episode_length_s = 5.0
+    # multi-agent specification and spaces definition
+    possible_agents = ["cart", "pendulum"]
+    action_spaces = {"cart": 1, "pendulum": 1}
+    observation_spaces = {"cart": 4, "pendulum": 3}
+    state_space = -1
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
+
+    # robot(s)
+    robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
+
+    # custom parameters/scales
+    # - controllable joint
+    cart_dof_name = "slider_to_cart"
+    pole_dof_name = "cart_to_pole"
+    pendulum_dof_name = "pole_to_pendulum"
+    # - action scale
+    cart_action_scale = 100.0  # [N]
+    pendulum_action_scale = 50.0  # [Nm]
+    # - reward scales
+    rew_scale_alive = 1.0
+    rew_scale_terminated = -2.0
+    rew_scale_cart_pos = 0
+    rew_scale_cart_vel = -0.01
+    rew_scale_pole_pos = -1.0
+    rew_scale_pole_vel = -0.01
+    rew_scale_pendulum_pos = -1.0
+    rew_scale_pendulum_vel = -0.01
+    # - reset states/conditions
+    initial_pendulum_angle_range = [-0.25, 0.25]  # pendulum angle sample range on reset [rad]
+    initial_pole_angle_range = [-0.25, 0.25]  # pole angle sample range on reset [rad]
+    max_cart_pos = 3.0  # reset if cart exceeds this position [m]
--- a/copy/tasks/manager_based/init.py
+++ b/copy/tasks/manager_based/init.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym  # noqa: F401
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/init.py
--- a/copy/tasks/manager_based/mindbot/agents/init.py
+++ b/copy/tasks/manager_based/mindbot/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/copy/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/copy/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py
+++ b/copy/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 16
+    max_iterations = 150
+    save_interval = 50
+    experiment_name = "cartpole_direct"
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[32, 32],
+        critic_hidden_dims=[32, 32],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
--- a/copy/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/copy/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/copy/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/copy/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/copy/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml
+++ b/copy/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/init.py
@@ -8,3 +8,4 @@
 from isaaclab.envs.mdp import *  # noqa: F401, F403

 from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
--- a/copy/tasks/manager_based/mindbot/mdp/rewards.py
+++ b/copy/tasks/manager_based/mindbot/mdp/rewards.py
@@ -0,0 +1,249 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # import pdb; pdb.set_trace()
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+def gripper_lid_side_alignment_simple(env: ManagerBasedRLEnv,
+                                       lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                       robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                       left_gripper_name: str = "left_hand__l",
+                                       right_gripper_name: str = "left_hand_r",
+                                       side_distance: float = 0.05,
+                                       height_offset: float = 0.1,
+                                       std: float = 0.1) -> torch.Tensor:  # 增大 std 使其更宽松
+    """Reward for positioning gripper center on the side of lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致（这是我们要对齐的方向）
+    - Z 方向：高度方向
+    
+    目标：
+    1. 夹爪中心在 lid 的 Y 方向两侧（距离 side_distance）
+    2. 夹爪中心在 lid 的 X 方向对齐（X 方向接近 0）
+    3. 夹爪中心在 lid 上方 height_offset 处
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    
+    # 计算相对位置（夹爪中心相对于 lid 中心）
+    rel_pos = gripper_center - lid_pos_w  # (num_envs, 3)
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    # 接受左侧或右侧
+    y_dist = torch.abs(rel_pos[:, 1])
+    # y_reward = 1.0 - torch.tanh(y_dist / std)
+    y_error = torch.abs(y_dist - side_distance)
+    y_reward = 1.0 - torch.tanh(y_error / std)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_reward = 1.0 - torch.tanh(x_dist / std)
+    # x_error = torch.abs(x_dist - side_distance)
+    # x_reward = 1.0 - torch.tanh(x_error / std)
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_reward = 1.0 - torch.tanh(z_error / std)
+    
+    # 组合奖励：所有条件都要满足
+    reward = x_reward * y_reward * z_reward
+    
+    return reward
+
+
+def gripper_lid_orientation_alignment(env: ManagerBasedRLEnv,
+                                      lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                      robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                      gripper_body_name: str = "left_hand_body",
+                                      std: float = 0.1) -> torch.Tensor:
+    """Reward for aligning gripper orientation with lid orientation.
+    
+    目标：让夹爪的坐标系和lid的坐标系对齐
+    - 夹爪的X轴和lid的X轴平行
+    - 夹爪的Y轴和lid的Y轴平行
+    - 夹爪的Z轴和lid的Z轴平行
+    
+    使用四元数误差来衡量对齐程度。
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # 获取lid的姿态（世界坐标系）
+    lid_quat_w = lid.data.root_quat_w  # (num_envs, 4) - (w, x, y, z)
+    
+    # 获取夹爪的姿态（世界坐标系）
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]  # (num_envs, 4)
+    
+    # 计算姿态误差（返回角度误差，单位：弧度）
+    orientation_error = quat_error_magnitude(gripper_quat_w, lid_quat_w)  # (num_envs,)
+    
+    # 使用tanh核函数将误差映射到奖励
+    # std控制敏感度：std越小，对齐要求越严格
+    reward = 1.0 - torch.tanh(orientation_error / std)
+    
+    return reward
+
+
+def gripper_perpendicular_to_lid(env: ManagerBasedRLEnv,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body",
+                                  std: float = 0.2) -> torch.Tensor:
+    """Reward for aligning gripper perpendicular to lid surface.
+    
+    目标：让夹爪的Y轴（抓取方向）垂直于lid的表面
+    
+    根据夹爪坐标系：
+    - X轴：两个夹爪朝中心合并的方向（红色箭头，水平向左）
+    - Y轴：夹爪间空隙的方向，抓取方向（绿色箭头，沿开口方向）
+    - Z轴：从夹爪"手掌"垂直向上（蓝色箭头）
+    
+    我们希望夹爪的Y轴垂直于lid表面（即Y轴与lid法向量平行）
+    如果lid水平放置，法向量是Z轴向上，那么夹爪Y轴应该与Z轴平行
+    """
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # 获取夹爪的姿态（世界坐标系）
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]  # (num_envs, 4)
+    
+    # 夹爪局部坐标系的Y轴方向（抓取方向）
+    gripper_y_local = torch.tensor([0.0, 1.0, 0.0], device=gripper_quat_w.device)  # (3,)
+    gripper_y_local = gripper_y_local.unsqueeze(0).repeat(gripper_quat_w.shape[0], 1)  # (num_envs, 3)
+    
+    # 将夹爪Y轴旋转到世界坐标系
+    gripper_y_world = quat_apply(gripper_quat_w, gripper_y_local)  # (num_envs, 3)
+    
+    # lid的表面法向量（假设lid水平放置，法向量向上）
+    lid_normal_world = torch.tensor([0.0, 0.0, 1.0], device=gripper_y_world.device)  # (3,)
+    lid_normal_world = lid_normal_world.unsqueeze(0).repeat(gripper_y_world.shape[0], 1)  # (num_envs, 3)
+    
+    # 计算两个向量之间的角度
+    dot_product = torch.sum(gripper_y_world * lid_normal_world, dim=1)  # (num_envs,)
+    dot_product = torch.clamp(dot_product, -1.0, 1.0)
+    angle = torch.acos(dot_product)  # (num_envs,) - 范围[0, π]
+    
+    # 我们希望角度接近0（夹爪Y轴与lid法向量平行，即垂直于lid表面）
+    # 或者接近π（夹爪Y轴向下，也垂直于lid表面）
+    # 选择较小的角度
+    angle_to_perpendicular = torch.min(angle, torch.pi - angle)  # (num_envs,)
+    
+    # 使用tanh核函数将角度误差映射到奖励
+    reward = 1.0 - torch.tanh(angle_to_perpendicular / std)
+    
+    return reward
--- a/copy/tasks/manager_based/mindbot/mdp/terminations.py
+++ b/copy/tasks/manager_based/mindbot/mdp/terminations.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
--- a/copy/tasks/manager_based/mindbot/mindbot_env_cfg.py
+++ b/copy/tasks/manager_based/mindbot/mindbot_env_cfg.py
@@ -0,0 +1,355 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import T
+import torch
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+# 用 DexCube 测试
+# DexCube_CFG = RigidObjectCfg(
+#     prim_path="{ENV_REGEX_NS}/DexCube",
+#     init_state=RigidObjectCfg.InitialStateCfg(
+#         pos=[0.1, 0.8, 0.85],
+#         rot=[1.0, 0.0, 0.0, 0.0],
+#         lin_vel=[0.0, 0.0, 0.0],
+#         ang_vel=[0.0, 0.0, 0.0],
+#     ),
+#     spawn=UsdFileCfg(
+#         usd_path="/home/ubuntu/50T/maic_usd_assets/DexCube/dex_cube_instanceable.usd",
+#         scale=(0.2, 0.2, 0.2),
+#         rigid_props=RigidBodyPropertiesCfg(
+#             solver_position_iteration_count=8,
+#             solver_velocity_iteration_count=1,
+#             max_angular_velocity=1000.0,
+#             max_linear_velocity=1000.0,
+#             max_depenetration_velocity=5.0,
+#             disable_gravity=False,
+#         ),
+#         collision_props=CollisionPropertiesCfg(
+#             collision_enabled=True,
+#             contact_offset=0.01,
+#             rest_offset=0.0,
+#             torsional_patch_radius=0.0,
+#         ),
+#     ),
+# )
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.25, 0.65, 0.85],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=8,
+            solver_velocity_iteration_count=1,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=5.0,
+            disable_gravity=False,
+        ),
+        # collision_props=CollisionPropertiesCfg(
+        #     collision_enabled=True,
+        #     contact_offset=0.01,
+        #     rest_offset=0.0,
+        #     torsional_patch_radius=0.0,
+        # ),
+    ),
+)
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(disable_gravity=False, max_depenetration_velocity=1.0),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=True,
+            solver_position_iteration_count=8,
+            solver_velocity_iteration_count=4,
+            stabilization_threshold=1e-6,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.05)),
+    actuators={}
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class MindbotSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+    # DexCube: RigidObjectCfg = DexCube_CFG.replace(prim_path="{ENV_REGEX_NS}/DexCube")
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+
+
+##
+# MDP settings
+##
+
+def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    left_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["l_joint[1-6]"],  # 左臂的6个关节
+        body_name="left_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        # 6维动作：(dx, dy, dz, droll, dpitch, dyaw)
+        # 可以分别指定每个维度的缩放，或者使用单个值应用到所有维度
+        scale=(0.05, 0.05, 0.05, 0.1, 0.1, 0.1),  # 位置增量(3个) + 姿态增量(3个，单位：弧度)
+        # 或者简化为：scale=0.05  # 应用到所有6个维度
+    )
+
+    grippers_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["left_hand_joint_.*"],
+        scale=0.5,
+        clip={
+            "left_hand_joint_left": (-0.03, 0.0),
+            "left_hand_joint_right": (0.0, 0.03),
+        },
+    )
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
+        # dexcube_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("DexCube")})
+        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    # # reset
+    # reset_mindbot_root = EventTerm(
+    #     func=mdp.reset_root_state_uniform,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("Mindbot"),
+    #         "pose_range": {"x": (0.0, 0.0)},
+    #         "velocity_range": {"x": (0.0, 0.0)},
+    #     },
+    # )
+    # reset_mindbot_joints = EventTerm(
+    #     func=mdp.reset_joints_by_offset,
+    #     mode="reset",
+    #     params={"asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+    #             "position_range": (-0.1, 0.1), "velocity_range": (-0.05, 0.05)},
+    # )
+    # 重置所有关节到 init_state（无偏移）
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+
+    # 只对左臂添加随机偏移
+    reset_mindbot_left_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+            "position_range": (-0.1, 0.1),
+            "velocity_range": (-0.05, 0.05),
+        },
+    )
+    # reset_dexcube = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+    #     params={"asset_cfg": SceneEntityCfg("DexCube"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+        params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+        params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    # reaching_dexcube = RewTerm(func=mdp.gripper_lid_distance,
+    #                            params={"std": 0.3, "lid_cfg": SceneEntityCfg("lid"), 
+    #                                    "robot_cfg": SceneEntityCfg("Mindbot"), "gripper_body_name": "left_hand_body"},
+    #                            weight=1.0)
+    
+    # 鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    gripper_side_alignment = RewTerm(func=mdp.gripper_lid_side_alignment_simple,
+                                     params={"lid_cfg": SceneEntityCfg("lid"),
+                                             "robot_cfg": SceneEntityCfg("Mindbot"),
+                                             "left_gripper_name": "left_hand__l",
+                                             "right_gripper_name": "left_hand_r",
+                                             "side_distance": 0.05,  # Y方向：距离 lid 中心 5cm
+                                             "height_offset": 0.1,  # Z方向：lid 上方 0.1m
+                                             "std": 0.3},  # 增大 std 使奖励更宽松
+                                     weight=1.0)
+    
+    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.001)
+    joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.0001, 
+                        params={"asset_cfg": SceneEntityCfg("Mindbot")})
+
+    # gripper_orientation_alignment = RewTerm(
+    #     func=mdp.gripper_lid_orientation_alignment,
+    #     weight=1.0,  # 可以根据需要调整权重
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "gripper_body_name": "left_hand__l",
+    #         "std": 0.3,  # 姿态对齐的容忍度（弧度），可以调整
+    #     },
+    # )
+
+    gripper_perpendicular = RewTerm(
+        func=mdp.gripper_perpendicular_to_lid,
+        weight=1.0,  # 可以根据需要调整权重
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "gripper_body_name": "left_hand_body",
+            "std": 0.2,  # 角度容忍度（弧度），约11.5度，可以调整
+        },
+    )
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+    dexcube_dropping = DoneTerm(func=mdp.lid_dropped,
+                                params={"minimum_height": -0.05, "lid_cfg": SceneEntityCfg("lid")})
+    
+    # 当夹爪在 lid 两侧合适位置，且 Z 方向在 lid 上方 0.1m 时终止
+    gripper_at_side = DoneTerm(func=mdp.gripper_at_lid_side,
+                           params={"lid_cfg": SceneEntityCfg("lid"),
+                                   "robot_cfg": SceneEntityCfg("Mindbot"),
+                                   "left_gripper_name": "left_hand__l",  # 改为两个下划线
+                                   "right_gripper_name": "left_hand_r",
+                                   "side_distance": 0.05,
+                                   "side_tolerance": 0.01,
+                                   "alignment_tolerance": 0.02,
+                                   "height_offset": 0.1,
+                                   "height_tolerance": 0.02})
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    action_rate = CurrTerm(func=mdp.modify_reward_weight,
+                           params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+    joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+                         params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+@configclass
+class MindbotEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 50
+        self.sim.render_interval = self.decimation
--- a/copy/ui_extension_example.py
+++ b/copy/ui_extension_example.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import omni.ext
+
+
+# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
+def some_public_function(x: int):
+    print("[mindbot] some_public_function was called with x: ", x)
+    return x**x
+
+
+# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
+# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
+# on_shutdown() is called.
+class ExampleExtension(omni.ext.IExt):
+    # ext_id is current extension id. It can be used with extension manager to query additional information, like where
+    # this extension is located on filesystem.
+    def on_startup(self, ext_id):
+        print("[mindbot] startup")
+
+        self._count = 0
+
+        self._window = omni.ui.Window("My Window", width=300, height=300)
+        with self._window.frame:
+            with omni.ui.VStack():
+                label = omni.ui.Label("")
+
+                def on_click():
+                    self._count += 1
+                    label.text = f"count: {self._count}"
+
+                def on_reset():
+                    self._count = 0
+                    label.text = "empty"
+
+                on_reset()
+
+                with omni.ui.HStack():
+                    omni.ui.Button("Add", clicked_fn=on_click)
+                    omni.ui.Button("Reset", clicked_fn=on_reset)
+
+    def on_shutdown(self):
+        print("[mindbot] shutdown")
--- a/source/mindbot/mindbot/robot/mindbot.py
+++ b/source/mindbot/mindbot/robot/mindbot.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configuration for a simple Cartpole robot."""
+
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
+
+##
+# Configuration
+##
+
+MINDBOT_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        # 1. UPDATE THE USD PATH to your local file
+        usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd.usd",
+        # usd_path="/home/ubuntu/50T/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+            max_depenetration_velocity=1.0,
+            linear_damping=0.5,
+            angular_damping=0.5,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            fix_root_link=True,
+            enabled_self_collisions=False,
+            solver_position_iteration_count=16,
+            solver_velocity_iteration_count=8,
+            stabilization_threshold=1e-6,  # 忽略小穿透
+        ),
+        collision_props=sim_utils.CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005, # 尝试从默认值增大到 0.01 或 0.02
+            rest_offset=0,   # 保持略小于 contact_offset
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        # 2. DEFINE THE INITIAL POSITIONS FOR ALL 12 JOINTS
+        # All are set to 0.0 here, but you should adjust them for a suitable "home" position.
+        joint_pos={
+            # 左臂 (Left Arm)
+            "l_joint1": -0.524,
+            "l_joint2": -1.047,
+            "l_joint3": -1.047,
+            "l_joint4": -1.571,
+            "l_joint5": 1.571,
+            "l_joint6": 0,
+            
+            # 右臂 (Right Arm)
+            "r_joint1": -0.524,
+            "r_joint2": -1.047,
+            "r_joint3": -1.047,
+            "r_joint4": -1.571,
+            "r_joint5": 1.571,
+            "r_joint6": 0,
+            # left wheel
+            "left_b_revolute_Joint": 0.0,
+            "left_f_revolute_Joint": 0.0,
+            # right wheel
+            "right_b_revolute_Joint": 0.0,
+            "right_f_revolute_Joint": 0.0,
+            # left gripper
+            # 注意：如果希望初始状态为完全打开，应该设置为 left=0.0, right=0.0
+            # 当前设置为 right=0.01 表示略微闭合状态
+            "left_hand_joint_left": 0.0,    # 范围：(-0.03, 0.0)，0.0=打开，-0.03=闭合
+            "left_hand_joint_right": 0.0,  # 范围：(0.0, 0.03)，0.0=打开，0.03=闭合
+            # right gripper
+            "right_hand_joint_left": 0.0,
+            "right_hand_joint_right": 0.0,
+            # trunk
+            "PrismaticJoint": 0.35,
+            # head
+            "head_revoluteJoint": 0.5236
+        },
+        # The initial (x, y, z) position of the robot's base in the world
+        pos=(0, 0, 0.05),
+    ),
+    actuators={
+        # 3. DEFINE ACTUATOR GROUPS FOR THE ARMS
+        # Group for the 6 left arm joints using a regular expression
+        "left_arm": ImplicitActuatorCfg(
+            joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
+            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
+            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
+            stiffness=0.0, #100000.0,                  # Note: Tune this for desired control performance
+            damping=1000.0, #10000.0,                     # Note: Tune this for desired control performance
+        ),
+        # Group for the 6 right arm joints using a regular expression
+        "right_arm": ImplicitActuatorCfg(
+            joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
+            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
+            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
+            stiffness=10000.0, #100000.0,                  # Note: Tune this for desired control performance1
+            damping=1000.0, #10000.0,                   # Note: Tune this for desired control performance
+        ),
+        "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
+        "trunk": ImplicitActuatorCfg(
+            joint_names_expr=["PrismaticJoint"],
+            stiffness=40000.0,    # 从 10000 增：强持位
+            damping=4000.0,      # 从 1000 增：抗振荡
+            effort_limit_sim=10000.0,
+            velocity_limit_sim=0.2,
+        ),
+        "wheels": ImplicitActuatorCfg(
+            joint_names_expr=[".*_revolute_Joint"], 
+            stiffness=0.0, 
+            damping=100.0,
+            effort_limit_sim=200.0,  # <<<<<< 新增：力矩上限，足够驱动轮子
+            velocity_limit_sim=50.0, # <<<<<< 新增：速度上限，防止过快
+        ),
+        "grippers": ImplicitActuatorCfg(
+            joint_names_expr=[".*_hand_joint.*"], 
+            stiffness=10000.0, 
+            damping=1000.0,
+            effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
+        ),
+    },
+)
--- a/source/mindbot/mindbot/tasks/init.py
+++ b/source/mindbot/mindbot/tasks/init.py
@@ -10,6 +10,8 @@
 ##

 from isaaclab_tasks.utils import import_packages
+from . import manager_based
+from . import direct

 # The blacklist is used to prevent importing configs from sub-packages
 _BLACKLIST_PKGS = ["utils", ".mdp"]
--- a/source/mindbot/mindbot/tasks/manager_based/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/init.py
@@ -4,3 +4,6 @@
 # SPDX-License-Identifier: BSD-3-Clause

 import gymnasium as gym  # noqa: F401
+from . import pullUltrasoundLidUp
+from . import pullUltrasoundLid2Table
+from . import openOven
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py
@@ -1,26 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-from __future__ import annotations
-
-import torch
-from typing import TYPE_CHECKING
-
-from isaaclab.assets import Articulation
-from isaaclab.managers import SceneEntityCfg
-from isaaclab.utils.math import wrap_to_pi
-
-if TYPE_CHECKING:
-    from isaaclab.envs import ManagerBasedRLEnv
-
-
-def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
-    """Penalize joint position deviation from a target value."""
-    # extract the used quantities (to enable type-hinting)
-    asset: Articulation = env.scene[asset_cfg.name]
-    # wrap the joint positions to (-pi, pi)
-    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
-    # compute the reward
-    return torch.sum(torch.square(joint_pos - target), dim=1)
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py
@@ -1,180 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-import math
-
-import isaaclab.sim as sim_utils
-from isaaclab.assets import ArticulationCfg, AssetBaseCfg
-from isaaclab.envs import ManagerBasedRLEnvCfg
-from isaaclab.managers import EventTermCfg as EventTerm
-from isaaclab.managers import ObservationGroupCfg as ObsGroup
-from isaaclab.managers import ObservationTermCfg as ObsTerm
-from isaaclab.managers import RewardTermCfg as RewTerm
-from isaaclab.managers import SceneEntityCfg
-from isaaclab.managers import TerminationTermCfg as DoneTerm
-from isaaclab.scene import InteractiveSceneCfg
-from isaaclab.utils import configclass
-
-from . import mdp
-
-##
-# Pre-defined configs
-##
-
-from isaaclab_assets.robots.cartpole import CARTPOLE_CFG  # isort:skip
-
-
-##
-# Scene definition
-##
-
-
-@configclass
-class MindbotSceneCfg(InteractiveSceneCfg):
-    """Configuration for a cart-pole scene."""
-
-    # ground plane
-    ground = AssetBaseCfg(
-        prim_path="/World/ground",
-        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
-    )
-
-    # robot
-    robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
-
-    # lights
-    dome_light = AssetBaseCfg(
-        prim_path="/World/DomeLight",
-        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
-    )
-
-
-##
-# MDP settings
-##
-
-
-@configclass
-class ActionsCfg:
-    """Action specifications for the MDP."""
-
-    joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
-
-
-@configclass
-class ObservationsCfg:
-    """Observation specifications for the MDP."""
-
-    @configclass
-    class PolicyCfg(ObsGroup):
-        """Observations for policy group."""
-
-        # observation terms (order preserved)
-        joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
-        joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
-
-        def __post_init__(self) -> None:
-            self.enable_corruption = False
-            self.concatenate_terms = True
-
-    # observation groups
-    policy: PolicyCfg = PolicyCfg()
-
-
-@configclass
-class EventCfg:
-    """Configuration for events."""
-
-    # reset
-    reset_cart_position = EventTerm(
-        func=mdp.reset_joints_by_offset,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
-            "position_range": (-1.0, 1.0),
-            "velocity_range": (-0.5, 0.5),
-        },
-    )
-
-    reset_pole_position = EventTerm(
-        func=mdp.reset_joints_by_offset,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
-            "position_range": (-0.25 * math.pi, 0.25 * math.pi),
-            "velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
-        },
-    )
-
-
-@configclass
-class RewardsCfg:
-    """Reward terms for the MDP."""
-
-    # (1) Constant running reward
-    alive = RewTerm(func=mdp.is_alive, weight=1.0)
-    # (2) Failure penalty
-    terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
-    # (3) Primary task: keep pole upright
-    pole_pos = RewTerm(
-        func=mdp.joint_pos_target_l2,
-        weight=-1.0,
-        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
-    )
-    # (4) Shaping tasks: lower cart velocity
-    cart_vel = RewTerm(
-        func=mdp.joint_vel_l1,
-        weight=-0.01,
-        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
-    )
-    # (5) Shaping tasks: lower pole angular velocity
-    pole_vel = RewTerm(
-        func=mdp.joint_vel_l1,
-        weight=-0.005,
-        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
-    )
-
-
-@configclass
-class TerminationsCfg:
-    """Termination terms for the MDP."""
-
-    # (1) Time out
-    time_out = DoneTerm(func=mdp.time_out, time_out=True)
-    # (2) Cart out of bounds
-    cart_out_of_bounds = DoneTerm(
-        func=mdp.joint_pos_out_of_manual_limit,
-        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
-    )
-
-
-##
-# Environment configuration
-##
-
-
-@configclass
-class MindbotEnvCfg(ManagerBasedRLEnvCfg):
-    # Scene settings
-    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0)
-    # Basic settings
-    observations: ObservationsCfg = ObservationsCfg()
-    actions: ActionsCfg = ActionsCfg()
-    events: EventCfg = EventCfg()
-    # MDP settings
-    rewards: RewardsCfg = RewardsCfg()
-    terminations: TerminationsCfg = TerminationsCfg()
-
-    # Post initialization
-    def __post_init__(self) -> None:
-        """Post initialization."""
-        # general settings
-        self.decimation = 2
-        self.episode_length_s = 5
-        # viewer settings
-        self.viewer.eye = (8.0, 0.0, 5.0)
-        # simulation settings
-        self.sim.dt = 1 / 120
-        self.sim.render_interval = self.decimation
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Open-Oven-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.open_oven_env_cfg:OpenOvenEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 48  # 稍微增加每轮步数
+    max_iterations = 5000   # 增加迭代次数，给它足够的时间学习
+    save_interval = 100
+    experiment_name = "open_oven"  # 建议改个名，不要叫 cartpole 了
+    
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.7,
+        actor_obs_normalization=True,   # 开启归一化
+        critic_obs_normalization=True,  # 开启归一化
+        # 增加网络容量：三层 MLP，宽度足够
+        actor_hidden_dims=[128, 64, 32], 
+        critic_hidden_dims=[128, 64, 32],
+        activation="elu",
+    )
+    
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005, # 保持适度的探索
+        num_learning_epochs=5,
+        num_mini_batches=8,
+        learning_rate=3e-4, # 如果网络变大后不稳定，可以尝试降低到 5.0e-4
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+
+
+# @configclass
+# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+#     num_steps_per_env = 16
+#     max_iterations = 150
+#     save_interval = 50
+#     experiment_name = "cartpole_direct"
+#     policy = RslRlPpoActorCriticCfg(
+#         init_noise_std=1.0,
+#         actor_obs_normalization=False,
+#         critic_obs_normalization=False,
+#         actor_hidden_dims=[32, 32],
+#         critic_hidden_dims=[32, 32],
+#         activation="elu",
+#     )
+#     algorithm = RslRlPpoAlgorithmCfg(
+#         value_loss_coef=1.0,
+#         use_clipped_value_loss=True,
+#         clip_param=0.2,
+#         entropy_coef=0.005,
+#         num_learning_epochs=5,
+#         num_mini_batches=4,
+#         learning_rate=1.0e-3,
+#         schedule="adaptive",
+#         gamma=0.99,
+#         lam=0.95,
+#         desired_kl=0.01,
+#         max_grad_norm=1.0,
+#     )
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""This sub-module contains the functions that are specific to the environment."""
+
+from isaaclab.envs.mdp import *  # noqa: F401, F403
+
+from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
+from .gripper import *  # noqa: F401, F403
+from .curriculums import *  # noqa: F401, F403
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/curriculums.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/curriculums.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+from isaaclab.envs.mdp import modify_term_cfg 
+
+def annealing_std(
+    env: ManagerBasedRLEnv, 
+    env_ids: torch.Tensor, 
+    current_value: float, 
+    start_step: int, 
+    end_step: int, 
+    start_std: float, 
+    end_std: float
+):
+    """
+    根据步数线性退火 std 值。
+    
+    Args:
+        current_value: 当前的参数值 (系统自动传入)
+        start_step: 开始退火的步数
+        end_step: 结束退火的步数
+        start_std: 初始 std
+        end_std: 最终 std
+    """
+    current_step = env.common_step_counter
+    
+    # 1. 还没到开始时间 -> 保持初始值 (或者不改)
+    if current_step < start_step:
+        # 如果当前值还没被设为 start_std，就强制设一下，否则不动
+        return start_std
+        
+    # 2. 已经超过结束时间 -> 保持最终值
+    elif current_step >= end_step:
+        return end_std
+        
+    # 3. 在中间 -> 线性插值
+    else:
+        ratio = (current_step - start_step) / (end_step - start_step)
+        new_std = start_std + (end_std - start_std) * ratio
+        return new_std
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/gripper.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/gripper.py
@@ -0,0 +1,54 @@
+# 假设这是在你的 mdp.py 文件中
+
+from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
+from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
+from isaaclab.utils import configclass
+import torch
+
+
+class CoupledJointPositionAction(JointPositionAction):
+    def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): 
+        super().__init__(cfg, env)
+
+    @property
+    def action_dim(self) -> int:
+        """强制 ActionManager 认为只需要 1D 输入。"""
+        return 1
+
+    """
+    这是运行时被实例化的类。它继承自 JointPositionAction。
+    """
+    def process_actions(self, actions: torch.Tensor):
+        scale = self.cfg.scale
+        offset = self.cfg.offset
+        # store the raw actions
+        self._raw_actions[:] = torch.clamp(actions, -1, 1)
+        # apply the affine transformations
+        target_position_interval = self._raw_actions * scale + offset
+        right_cmd = target_position_interval
+        left_cmd = -target_position_interval
+        # import pdb; pdb.set_trace()
+        self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
+        # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
+        # clip actions
+        if self.cfg.clip is not None:
+            self._processed_actions = torch.clamp(
+                self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
+            )
+
+
+@configclass
+class CoupledJointPositionActionCfg(JointPositionActionCfg):
+    """
+    配置类。关键在于设置 class_type 指向上面的实现类。
+    """
+    # !!! 关键点 !!! 
+    # 告诉 ActionManager: "当你看到这个配置时，请实例化 CoupledJointPositionAction 这个类"
+    class_type = CoupledJointPositionAction 
+
+    def __post_init__(self) -> None:
+        # 这里的检查逻辑是没问题的，因为 ActionManager 会在初始化时调用它
+        if len(self.joint_names) != 2:
+            raise ValueError("Requires exactly two joint names.")
+        super().__post_init__()
+
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/rewards.py
@@ -0,0 +1,758 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+def gripper_handle_orientation_alignment(
+    env,
+    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    handle_name: str = "Equipment_BB_13_C",
+    gripper_body_name: str = "right_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 夹爪指尖指向 (Local Z)
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 夹爪手指开合轴 (Local Y)
+    lid_handle_axis: tuple = (0.0, 0.0, 1.0),  
+    max_angle_deg: float =60.0,
+    # handle_local_axis 参数已删除，因为直接使用世界坐标系 Z 轴更稳健
+) -> torch.Tensor:
+    """
+    修正版：
+    1. 解决了梯度消失：使用 (dot+1)/2 替代 clamp，保证全角度有梯度。
+    2. 解决了轴向错误：直接认定手柄是垂直的 (World Z)，修复抓取姿态。
+    """
+    # 1. 获取数据
+    # dryingbox = env.scene[dryingbox_cfg.name] (计算轴向不再需要它，节省计算)
+    robot = env.scene[robot_cfg.name]
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    box=env.scene[dryingbox_cfg.name]
+    hd_ids,_=box.find_bodies([handle_name],preserve_order=True)
+    handle_quat_w=box.data.body_quat_w[:,hd_ids[0],:]
+
+    # --- 约束 A: 指向约束 (Pointing) ---
+    # 目标：夹爪 Z 轴 指向 World +X
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    
+    target_fwd = torch.tensor([1.0, 0.0, 0.0], device=env.device).repeat(env.num_envs, 1)
+    
+    dot_fwd = torch.sum(grip_fwd_world * target_fwd, dim=1)
+    dot_fwd=torch.clamp(dot_fwd,-1.0,1.0)
+    angle_rad = torch.acos(torch.clamp(dot_fwd, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_forward = 1.0 - angle_normalized  # 角度越小，奖励越大
+    # 如果角度超过60度，奖励为0
+    rew_forward = torch.where(angle_rad <= max_angle_rad, rew_forward, torch.zeros_like(rew_forward))
+    # nan -> 0
+    rew_forward = torch.where(torch.isnan(rew_forward), torch.zeros_like(rew_forward), rew_forward)
+
+    # [关键修改 1] 使用软化奖励，提供全局梯度
+    # 范围 [0, 1]: 1.0 是完美对齐，0.5 是垂直，0.0 是完全背对
+    # 这样即使现在是指向地面的 (dot=0)，它也有 0.5 分，且知道往上抬分会变高
+    # rew_forward = (dot_fwd + 1.0) / 2.0
+    # 如果你希望能更陡峭一点，可以加个平方： rew_forward = rew_forward * rew_forward
+
+    # --- 约束 B: 抓取姿态约束 (Grasp Alignment) ---
+    # 目标：夹爪的开合轴 (Y) 应该垂直于 手柄轴 (Z)
+    # 这样手指就是水平开合的
+    
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = normalize(quat_apply(gripper_quat_w, grip_width_local))
+    
+    # [关键修改 2] 强制假设手柄是垂直的 (World Z)
+    # 除非你的烘箱会倾斜倒在地上，否则这是最稳健的写法，不用管局部坐标系
+    handle_axis_local = torch.tensor([0.0, 0.0, 1.0], device=env.device).repeat(env.num_envs, 1)
+    handle_axis_world=quat_apply(handle_quat_w,handle_axis_local)
+    dot_yaw=torch.sum(grip_width_world*handle_axis_world,dim=1)
+    rew_yaw=torch.square(torch.abs(dot_yaw))+1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_forward * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {dot_fwd}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+
+# 在 rewards.py 中，修改 _is_grasped 函数，使其支持 dryingbox 和 handle_name
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,  # 改为 dryingbox_cfg
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,  # 新增 handle_name 参数
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪X轴距离合适，不能压在把手上 (X-Axis Check)
+    """
+    # 1. 获取对象
+    dryingbox = env.scene[dryingbox_cfg.name]  # 改为 dryingbox
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
+    handle_pos_w[:, 1] += height_offset  # X方向偏移
+    handle_pos_w[:, 0] += 0.01
+    # handle_pos_w[:, 2] -= 0.04
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - handle_pos_w, dim=-1)
+    if env.common_step_counter % 500 == 0:
+        print(f"Env 0 Dist to Handle: {dist_to_handle[0].item():.4f}m")
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：X轴距离判定 (防止压在把手上)
+    x_diff = gripper_center[:, 0] - handle_pos_w[:, 0]
+    is_x_valid = torch.abs(x_diff) < 0.03  # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    is_not_empty = dist_fingertips > 0.005
+
+    # 在计算 dist_to_handle 后
+    if env.common_step_counter % 500 == 0 and dist_to_handle[0] < 0.15:
+        delta = gripper_center[0] - handle_pos_w[0]
+        print(f"close_dbg env0: handle={handle_pos_w[0].cpu().numpy()}, "
+                f"center={gripper_center[0].cpu().numpy()}, "
+                f"delta={delta.cpu().numpy()}, "
+                f"dist={dist_to_handle[0].item():.4f}, "
+                f"fingertip_dist={dist_fingertips[0].item():.4f}, "
+                f"joint_pos={joint_pos[0].cpu().numpy()}")
+        
+    # 综合判定
+    return is_near & is_x_valid & is_effort_closing & is_not_empty
+
+
+# 修改 gripper_close_when_near 函数
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),  # 改为 dryingbox_cfg
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    handle_name: str = "Equipment_BB_13_C",  # 新增 handle_name 参数
+    left_gripper_name: str = "right_hand_l",  # 更新默认值
+    right_gripper_name: str = "right_hand__r",  # 更新默认值
+    joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"],  # 更新默认值
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.005,  # 更新默认值
+    grasp_radius: float = 0.03  # 更新默认值
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, dryingbox_cfg, robot_cfg, handle_name,  # 传入 handle_name
+        left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+
+
+
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     handle_name: str = "Equipment_BB_13_C",
+#     left_gripper_name: str = "right_hand_l",
+#     right_gripper_name: str = "right_hand__r",
+#     joint_names: list = ["right_hand_joint_left", "right_hand_joint_right"],  # 保留接口但不强依赖
+#     # 关键参数
+#     handle_thickness: float = 0.0388,   # 把手厚度（米），≈3.88cm
+#     thickness_tol: float = 0.008,       # 容差（米），≈0.8cm，控制惩罚/奖励斜率
+#     grasp_radius: float = 0.04,         # 抓取判定半径，略大于把手厚度
+#     height_offset: float = 0.00,        # 如需上下偏移可调整
+#     target_close_pos: float = 0.03,     # 保留给 joint/clamping 兼容
+# ) -> torch.Tensor:
+#     dryingbox = env.scene[dryingbox_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+
+#     # 目标位置（把手中心，可做微偏移）
+#     handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3].clone()
+#     handle_pos[:, 1] += height_offset
+
+#     # 手指位置与中心
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+#     center = (pos_L + pos_R) / 2.0
+
+#     # 到把手的距离（只在近距离才给分）
+#     dist = torch.norm(center - handle_pos, dim=-1)
+#     is_near = dist < grasp_radius
+
+#     # 实际两指间距
+#     finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+#     # 奖励：两指间距越接近把手厚度越好，平滑衰减
+#     error = torch.abs(finger_dist - handle_thickness)
+#     close_reward = 1.0 - torch.tanh(error / thickness_tol)
+
+#     # 避免 NaN
+#     close_reward = torch.nan_to_num(close_reward, nan=0.0)
+
+#     return torch.where(is_near, close_reward, torch.zeros_like(close_reward))
+
+
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
+
+def debug_robot_physics(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "right_hand_body",
+    distance_threshold: float = 0.1,  # 当距离小于此值时打印
+    print_interval: int = 10  # 每N步打印一次
+) -> None:
+    """
+    调试函数：打印机器人的速度、加速度等信息
+    """
+    # 使用静态变量控制打印频率
+    if not hasattr(debug_robot_physics, "step_count"):
+        debug_robot_physics.step_count = 0
+    debug_robot_physics.step_count += 1
+    
+    if debug_robot_physics.step_count % print_interval != 0:
+        return
+    
+    robot = env.scene[robot_cfg.name]
+    
+    # 1. 获取机器人根节点的速度和加速度
+    root_lin_vel = robot.data.root_lin_vel_w  # (num_envs, 3)
+    root_ang_vel = robot.data.root_ang_vel_w  # (num_envs, 3)
+    root_lin_vel_magnitude = torch.norm(root_lin_vel, dim=1)  # (num_envs,)
+    root_ang_vel_magnitude = torch.norm(root_ang_vel, dim=1)  # (num_envs,)
+    
+    # 2. 获取夹爪body的速度
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_lin_vel = robot.data.body_lin_vel_w[:, body_ids[0], :]  # (num_envs, 3)
+    gripper_ang_vel = robot.data.body_ang_vel_w[:, body_ids[0], :]  # (num_envs, 3)
+    gripper_lin_vel_mag = torch.norm(gripper_lin_vel, dim=1)
+    gripper_ang_vel_mag = torch.norm(gripper_ang_vel, dim=1)
+    
+    # 3. 获取关节速度（可能影响碰撞）
+    joint_vel = robot.data.joint_vel  # (num_envs, num_joints)
+    max_joint_vel = torch.max(torch.abs(joint_vel), dim=1)[0]  # (num_envs,)
+    
+    # 4. 检查是否有异常高的速度（可能导致碰撞问题）
+    high_vel_mask = (gripper_lin_vel_mag > 0.5) | (root_lin_vel_magnitude > 0.3)
+    
+    # if torch.any(high_vel_mask):
+    #     env_ids = torch.where(high_vel_mask)[0].cpu().tolist()
+    #     for env_id in env_ids[:3]:  # 只打印前3个环境
+    #         print(f"\n[DEBUG] Step {debug_robot_physics.step_count}, Env {env_id}:")
+    #         print(f"  机器人根节点线性速度: {root_lin_vel[env_id].cpu().numpy()} (magnitude: {root_lin_vel_magnitude[env_id].item():.4f} m/s)")
+    #         print(f"  机器人根节点角速度: {root_ang_vel[env_id].cpu().numpy()} (magnitude: {root_ang_vel_magnitude[env_id].item():.4f} rad/s)")
+    #         print(f"  夹爪线性速度: {gripper_lin_vel[env_id].cpu().numpy()} (magnitude: {gripper_lin_vel_mag[env_id].item():.4f} m/s)")
+    #         print(f"  夹爪角速度: {gripper_ang_vel[env_id].cpu().numpy()} (magnitude: {gripper_ang_vel_mag[env_id].item():.4f} rad/s)")
+            # print(f"  最大关节速度: {max_joint_vel[env_id].item():.4f} rad/s")
+
+def gripper_handle_position_alignment(env: ManagerBasedRLEnv,
+                                    dryingbox_cfg: SceneEntityCfg = SceneEntityCfg("dryingbox"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    handle_name: str = "Equipment_BB_13_C",
+                                    left_gripper_name: str = "right_hand_l",
+                                    right_gripper_name: str = "right_hand__r",
+                                    height_offset: float = 0.03,
+                                    std: float = 0.1) -> torch.Tensor:
+
+    dryingbox: RigidObject = env.scene[dryingbox_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    handle_ids, _ = dryingbox.find_bodies([handle_name], preserve_order=True)
+    handle_pos_w = dryingbox.data.body_pos_w[:, handle_ids[0], :]
+    target_pos = handle_pos_w.clone()
+    # target_pos[:, 1] += height_offset
+    # target_pos[:, 0] += 0.01
+    # target_pos[:, 2] -= 0.04
+    
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # ===== 添加调试打印 =====
+    # 当夹爪接近手柄时（距离 < 0.15m）打印物理信息
+    # close_mask = distance < 0.03
+    # if torch.any(close_mask):
+    #     debug_robot_physics(env, robot_cfg, "right_hand_body", distance_threshold=0.03, print_interval=5)
+    # # ========================
+    
+    position_reward = 1.0 - torch.tanh(distance / std) + 1e-6
+    
+    # 计算距离后插入
+    # if env.common_step_counter % 500 == 0:
+    #     print("pos_align dbg env0: "
+    #         f"handle={handle_pos_w[0].cpu().numpy()}, "
+    #         f"target={target_pos[0].cpu().numpy()}, "
+    #         f"gripL={left_gripper_pos[0].cpu().numpy()}, "
+    #         f"gripR={right_gripper_pos[0].cpu().numpy()}, "
+    #         f"center={gripper_center[0].cpu().numpy()}, "
+    #         f"dist={distance[0].item():.4f}")
+
+    return position_reward
+
+
+
+def gripper_open_when_far(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,   # 保留接口一致性，但不再使用
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,               # 保留接口一致性，但不再使用
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,              # 不再使用
+    dist_threshold: float = 0.5,    # 不再使用
+    target_open_dist: float = 0.06, # 两手指完全打开时的距离（米）
+) -> torch.Tensor:
+    """
+    奖励夹爪最大张开状态：
+    只根据左右两根手指之间的实际距离计算奖励，
+    当距离接近 target_open_dist (0.06m) 时奖励最大。
+    """
+
+    # 1. 获取机器人
+    robot = env.scene[robot_cfg.name]
+
+    # 2. 获取左右手指的世界坐标
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+
+    # 3. 计算两手指之间的实际距离
+    finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+    # 4. 奖励：距离越接近最大打开距离，奖励越高
+    error = torch.abs(finger_dist - target_open_dist)
+    reward = 1.0 - torch.tanh(error / 0.01)  # 0.01m = 1cm 作为平滑尺度
+
+    return reward
+
+def gripper_open_penalty_when_far(
+    env: ManagerBasedRLEnv,
+    dryingbox_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    handle_name: str,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    dist_threshold: float = 0.03,   # 3cm 抓取保护区
+    max_penalty_dist: float = 0.55, # 0.55m 惩罚激活边界
+) -> torch.Tensor:
+    """
+    使用 Sigmoid 激活函数的惩罚项：
+    1. 距离把手 < 0.55m 时，惩罚迅速激活并保持在高位 (接近 1.0)。
+    2. 距离把手 < 0.03m 时，惩罚强制清零，允许闭合抓取。
+    3. 两手指距离越小，惩罚值越大。
+    """
+    robot = env.scene[robot_cfg.name]
+    dryingbox = env.scene[dryingbox_cfg.name]
+
+    # 1. 获取位置信息
+    handle_ids, _ = dryingbox.find_bodies([handle_name])
+    handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+    # 2. 计算距离
+    gripper_center = (pos_L + pos_R) / 2.0
+    distance = torch.norm(gripper_center - handle_pos, dim=1)
+    finger_dist = torch.norm(pos_L - pos_R, dim=-1)
+
+    # 3. 距离激活门控 (Sigmoid 方法)
+    # 当 distance < 0.55 时，(0.55 - dist) 为正，sigmoid 趋近于 1.0
+    # 50.0 是陡峭系数，系数越大，0.55m 处的切换越像开关
+    dist_gate = torch.sigmoid((max_penalty_dist - distance) * 50.0)
+
+    # 4. 抓取区保护 (Deadzone)
+    # 距离小于 3cm 时，is_not_near 变为 0，完全撤销惩罚
+    is_not_near = (distance > dist_threshold).float()
+
+    # 5. 闭合程度计算 (0 = 全开, 1 = 全闭)
+    # 假设手指最大张开距离为 0.06m
+    max_open = 0.06
+    closedness = torch.clamp(1.0 - (finger_dist / max_open), 0.0, 1.0)
+
+    # 6. 综合计算并 Clamp
+    # 只有在 (0.03 < distance < 0.55) 范围内且手指闭合时，值才会很大
+    penalty_magnitude = dist_gate * is_not_near * closedness
+    
+    # 返回负值作为惩罚 (值域 [-1, 0])
+    return -torch.clamp(penalty_magnitude, 0.0, 1.0)
+
+
+# def gripper_open_penalty_when_far(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     handle_name: str,
+#     left_gripper_name: str,
+#     right_gripper_name: str,
+#     joint_names: list,
+#     dist_threshold: float = 0.03, # 3cm 阈值
+# ) -> torch.Tensor:
+#     """
+#     惩罚项：在距离把手 3cm 以外时，如果夹爪未张开则扣分。
+#     这不会鼓励离远，而是鼓励在远处时‘必须张开’。
+#     """
+#     robot = env.scene[robot_cfg.name]
+#     dryingbox = env.scene[dryingbox_cfg.name]
+
+#     # 1. 获取把手和夹爪中心位置
+#     handle_ids, _ = dryingbox.find_bodies([handle_name])
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
+    
+#     # 2. 计算实时距离
+#     distance = torch.norm(gripper_center - handle_pos, dim=1)
+    
+#     # 3. 计算夹爪的“闭合度” (偏差值)
+#     # 假设关节位置 0.0 是最大张开状态，abs(pos) 越大表示越闭合
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     mean_closedness = torch.mean(joint_pos_abs, dim=1)
+    
+#     # 4. 惩罚逻辑
+#     # 如果距离 > 3cm (is_far): 奖励 = -mean_closedness (闭合越多扣分越多)
+#     # 如果距离 <= 3cm: 奖励 = 0 (不再扣分，允许闭合拿抓取分)
+#     if env.common_step_counter % 100 == 0:
+#         print("gripper_closed_penalty distance: ", distance[0])
+#     is_far = distance > dist_threshold
+#     penalty = -mean_closedness
+    
+#     # 这样在远处最好也就是0分（全开），不存在通过远离来刷分的可能
+#     return torch.where(is_far, penalty, torch.zeros_like(penalty))
+
+# def gripper_open_until_near(
+#     env: ManagerBasedRLEnv,
+#     dryingbox_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     handle_name: str,
+#     left_gripper_name: str,
+#     right_gripper_name: str,
+#     joint_names: list,
+#     dist_threshold: float = 0.03,  # 3cm 阈值
+# ) -> torch.Tensor:
+#     """奖励夹爪在远离目标时保持打开状态，接近后不再约束。"""
+#     robot = env.scene[robot_cfg.name]
+#     dryingbox = env.scene[dryingbox_cfg.name]
+
+#     # 1. 计算夹爪中心到把手的距离
+#     handle_ids, _ = dryingbox.find_bodies([handle_name])
+#     handle_pos = dryingbox.data.body_pos_w[:, handle_ids[0], :3]
+    
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     gripper_center = (robot.data.body_pos_w[:, l_ids[0], :3] + robot.data.body_pos_w[:, r_ids[0], :3]) / 2.0
+    
+#     distance = torch.norm(gripper_center - handle_pos, dim=1)
+    
+#     # 2. 获取夹爪当前关节位置 (假设 0 是完全打开)
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 计算当前关节位置与 0（打开状态）的偏差
+#     joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     mean_error = torch.mean(joint_pos_abs, dim=1)
+    
+#     # 3. 计算奖励逻辑
+#     # 距离 > 3cm 时：关节越接近 0（打开），奖励越高 (最大为 1.0)
+#     # 距离 <= 3cm 时：奖励为 0，不干涉模型闭合
+#     is_far = distance > dist_threshold
+#     open_reward = 1.0 - torch.tanh(mean_error / 0.01) # 1cm 内平滑衰减
+    
+#     return torch.where(is_far, open_reward, torch.zeros_like(open_reward))
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/rewards.py.bak
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/rewards.py.bak
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/terminations.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/mdp/terminations.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
+
+
+def base_height_failure(env: ManagerBasedRLEnv,
+                        minimum_height: float | None = None,
+                        maximum_height: float | None = None,
+                        asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Terminate when the robot's base height is outside the specified range."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    root_pos_z = asset.data.root_pos_w[:, 2]
+    
+    # check if height is outside the range
+    out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
+    if minimum_height is not None:
+        out_of_bounds |= root_pos_z < minimum_height
+    if maximum_height is not None:
+        out_of_bounds |= root_pos_z > maximum_height
+        
+    return out_of_bounds
+
--- a/source/mindbot/mindbot/tasks/manager_based/openOven/open_oven_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/openOven/open_oven_env_cfg.py
@@ -0,0 +1,531 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+ 
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, 0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+DRYINGBOX_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        pos=[0.95, -0.45, 0.9],
+        rot=[-0.7071, 0.0, 0.0, 0.7071]
+        ),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=["RevoluteJoint"],
+            
+            stiffness=10000.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.2, 0.65, 0.9],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=[".*"],
+            
+            stiffness=0.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class OpenOvenSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
+    # ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+
+
+##
+# MDP settings
+##
+
+def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
+    return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+
+# 新增：通用获取 body 世界姿态的函数
+def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
+    return asset.data.body_quat_w[:, body_ids[0]]
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    right_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["r_joint[1-6]"],  # 左臂的6个关节
+        body_name="right_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+
+    # left_arm_fixed = mdp.JointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["l_joint[1-6]"], # 对应 l_joint1 到 l_joint6
+        
+    #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
+    #     scale=0.0, 
+    #     offset={
+    #         "l_joint1": 2.3562,
+    #         "l_joint2": -1.2217,
+    #         "l_joint3": -1.5708,
+    #         "l_joint4": 1.5708,
+    #         "l_joint5": 1.5708,
+    #         "l_joint6": -1.2217,
+    #     },
+    # )
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["right_hand_joint_left", "right_hand_joint_right"],
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
+    )
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.25,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
+        dryingbox_handle_pos = ObsTerm(
+            func=get_body_pos_rel, 
+            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+        )
+        dryingbox_handle_quat = ObsTerm(
+            func=get_body_quat_w, 
+            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+        )
+        # lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        # ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+    # 重置所有关节到 init_state（无偏移）
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 只对左臂添加随机偏移
+    reset_mindbot_right_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["r_joint[1-6]"]),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    # reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+    #     params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    # reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+    #     params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    gripper_handle_orientation_alignment = RewTerm(
+        func=mdp.gripper_handle_orientation_alignment,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name": "Equipment_BB_13_C",
+            "gripper_body_name": "right_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 0.0, 1.0),
+            # 删除了 handle_axis，因为新函数中不再使用它
+            "max_angle_deg": 30.0,  # 建议改为 30.0 或更小，85.0 对指向性约束来说太宽松
+        },
+        weight=4.0 # 建议保持在 4.0 或 5.0，确保姿态约束有足够的引导力
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    
+    gripper_handle_position_alignment = RewTerm(
+        func=mdp.gripper_handle_position_alignment,
+        params={
+            "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "handle_name":"Equipment_BB_13_C",
+            "left_gripper_name": "right_hand_l",
+            "right_gripper_name": "right_hand__r",
+            "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度0.3 ###0.1
+        },
+        weight=3.0
+    )
+
+        # 强制远距离打开奖励
+    # 远距离闭合惩罚
+    # gripper_closed_penalty = RewTerm(
+    #     func=mdp.gripper_open_penalty_when_far,
+    #     params={
+    #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "handle_name": "Equipment_BB_13_C",
+    #         "left_gripper_name": "right_hand_l",
+    #         "right_gripper_name": "right_hand__r",
+    #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+    #         "dist_threshold": 0.02, # 3cm 以外不允许闭合
+    #         "max_penalty_dist":0.055 #0.055
+    #     },
+    #     weight=1.0 # 提高权重，让扣分很痛8.0
+    # )
+
+
+    # # 【新增】精细对齐 (引导进入 2cm 圈)
+    # gripper_handle_fine_alignment = RewTerm(
+    #     func=mdp.gripper_handle_position_alignment,
+    #     params={
+    #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "handle_name":"Equipment_BB_13_C",
+    #         "left_gripper_name": "right_hand_l",
+    #         "right_gripper_name": "right_hand__r",
+    #         "height_offset": 0.00,  # Z方向：lid 上方 0.1m
+    #         "std": 0.03,  # 位置对齐的容忍度0.05
+    #     },
+    #     weight=5.0 # 高权重
+    # )
+
+    # gripper_close_interaction = RewTerm( 
+    #     func=mdp.gripper_close_when_near,
+    #     params={
+    #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "handle_name":"Equipment_BB_13_C",
+    #         "left_gripper_name": "right_hand_l",
+    #         "right_gripper_name": "right_hand__r",
+    #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
+    #         "target_close_pos": 0.03,
+    #         "height_offset": 0.00,#0.07
+    #         "grasp_radius": 0.04,#original 0.03
+    #     },
+    #     weight=10.0
+    # )
+
+    # lid_lifted_reward = RewTerm(
+    #     func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+    #     },
+    #     weight=10.0 # 给一个足够大的权重
+    # )
+
+    # lid_lifting_reward = RewTerm(
+    #     func=mdp.lid_lift_progress_reward,
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+    #         "target_height_lift": 0.25,
+    #         "height_offset": 0.07, # 与其他奖励保持一致
+    #         "std": 0.1
+    #     },
+    #     # 权重设大一点，告诉它这是最终目标
+    #     weight=10.0 
+    # )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    # lid_fly_away = DoneTerm(
+    #     func=mdp.base_height_failure,
+    #     params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    # )
+
+    # # 新增：盖子掉落判定
+    # # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    # lid_dropped = DoneTerm(
+    #     func=mdp.base_height_failure, # 复用高度判定函数
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("lid"), 
+    #         "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+    #     },
+    # )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class OpenOvenEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: OpenOvenSceneCfg = OpenOvenSceneCfg(num_envs=5, env_spacing=3.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Pull-Ultrasound-Lid-Table-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 48  # 稍微增加每轮步数
+    max_iterations = 5000   # 增加迭代次数，给它足够的时间学习
+    save_interval = 100
+    experiment_name = "mindbot_grasp"  # 建议改个名，不要叫 cartpole 了
+    
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.7,
+        actor_obs_normalization=True,   # 开启归一化
+        critic_obs_normalization=True,  # 开启归一化
+        # 增加网络容量：三层 MLP，宽度足够
+        actor_hidden_dims=[128, 64, 32], 
+        critic_hidden_dims=[128, 64, 32],
+        activation="elu",
+    )
+    
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005, # 保持适度的探索
+        num_learning_epochs=5,
+        num_mini_batches=8,
+        learning_rate=3e-4, # 如果网络变大后不稳定，可以尝试降低到 5.0e-4
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+
+
+# @configclass
+# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+#     num_steps_per_env = 16
+#     max_iterations = 150
+#     save_interval = 50
+#     experiment_name = "cartpole_direct"
+#     policy = RslRlPpoActorCriticCfg(
+#         init_noise_std=1.0,
+#         actor_obs_normalization=False,
+#         critic_obs_normalization=False,
+#         actor_hidden_dims=[32, 32],
+#         critic_hidden_dims=[32, 32],
+#         activation="elu",
+#     )
+#     algorithm = RslRlPpoAlgorithmCfg(
+#         value_loss_coef=1.0,
+#         use_clipped_value_loss=True,
+#         clip_param=0.2,
+#         entropy_coef=0.005,
+#         num_learning_epochs=5,
+#         num_mini_batches=4,
+#         learning_rate=1.0e-3,
+#         schedule="adaptive",
+#         gamma=0.99,
+#         lam=0.95,
+#         desired_kl=0.01,
+#         max_grad_norm=1.0,
+#     )
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""This sub-module contains the functions that are specific to the environment."""
+
+from isaaclab.envs.mdp import *  # noqa: F401, F403
+
+from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
+from .gripper import *  # noqa: F401, F403
+from .curriculums import *  # noqa: F401, F403
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/curriculums.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/curriculums.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+from isaaclab.envs.mdp import modify_term_cfg 
+
+def annealing_std(
+    env: ManagerBasedRLEnv, 
+    env_ids: torch.Tensor, 
+    current_value: float, 
+    start_step: int, 
+    end_step: int, 
+    start_std: float, 
+    end_std: float
+):
+    """
+    根据步数线性退火 std 值。
+    
+    Args:
+        current_value: 当前的参数值 (系统自动传入)
+        start_step: 开始退火的步数
+        end_step: 结束退火的步数
+        start_std: 初始 std
+        end_std: 最终 std
+    """
+    current_step = env.common_step_counter
+    
+    # 1. 还没到开始时间 -> 保持初始值 (或者不改)
+    if current_step < start_step:
+        # 如果当前值还没被设为 start_std，就强制设一下，否则不动
+        return start_std
+        
+    # 2. 已经超过结束时间 -> 保持最终值
+    elif current_step >= end_step:
+        return end_std
+        
+    # 3. 在中间 -> 线性插值
+    else:
+        ratio = (current_step - start_step) / (end_step - start_step)
+        new_std = start_std + (end_std - start_std) * ratio
+        return new_std
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/gripper.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/gripper.py
@@ -0,0 +1,54 @@
+# 假设这是在你的 mdp.py 文件中
+
+from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
+from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
+from isaaclab.utils import configclass
+import torch
+
+
+class CoupledJointPositionAction(JointPositionAction):
+    def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): 
+        super().__init__(cfg, env)
+
+    @property
+    def action_dim(self) -> int:
+        """强制 ActionManager 认为只需要 1D 输入。"""
+        return 1
+
+    """
+    这是运行时被实例化的类。它继承自 JointPositionAction。
+    """
+    def process_actions(self, actions: torch.Tensor):
+        scale = self.cfg.scale
+        offset = self.cfg.offset
+        # store the raw actions
+        self._raw_actions[:] = torch.clamp(actions, -1, 1)
+        # apply the affine transformations
+        target_position_interval = self._raw_actions * scale + offset
+        right_cmd = target_position_interval
+        left_cmd = -target_position_interval
+        # import pdb; pdb.set_trace()
+        self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
+        # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
+        # clip actions
+        if self.cfg.clip is not None:
+            self._processed_actions = torch.clamp(
+                self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
+            )
+
+
+@configclass
+class CoupledJointPositionActionCfg(JointPositionActionCfg):
+    """
+    配置类。关键在于设置 class_type 指向上面的实现类。
+    """
+    # !!! 关键点 !!! 
+    # 告诉 ActionManager: "当你看到这个配置时，请实例化 CoupledJointPositionAction 这个类"
+    class_type = CoupledJointPositionAction 
+
+    def __post_init__(self) -> None:
+        # 这里的检查逻辑是没问题的，因为 ActionManager 会在初始化时调用它
+        if len(self.joint_names) != 2:
+            raise ValueError("Requires exactly two joint names.")
+        super().__post_init__()
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/rewards.py
@@ -0,0 +1,902 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+
+# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+#                                         lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#                                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                         left_gripper_name: str = "left_hand__l",
+#                                         right_gripper_name: str = "left_hand_r",
+#                                         height_offset: float = 0.1,
+#                                         std: float = 0.1) -> torch.Tensor:
+#     """奖励函数：夹爪相对于lid的位置对齐。
+    
+#     Args:
+#         env: 环境实例
+#         lid_cfg: lid的配置
+#         robot_cfg: 机器人的配置
+#         left_gripper_name: 左侧夹爪body名称
+#         right_gripper_name: 右侧夹爪body名称
+#         side_distance: Y方向的期望距离
+#         height_offset: Z方向的期望高度偏移
+#         std: tanh核函数的标准差
+        
+#     Returns:
+#         位置对齐奖励 (num_envs,)
+#     """
+#     lid: RigidObject = env.scene[lid_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     # 获取两个夹爪的位置
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+#     # 计算夹爪中心位置
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    
+#     # 计算相对位置（夹爪中心相对于 lid 中心）
+#     rel_pos = gripper_center - lid_pos_w  # (num_envs, 3)
+    
+#     # X 方向位置：应该对齐（接近 0）
+#     x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
+#     x_reward = 1.0 - torch.tanh(x_dist / std)
+    
+#     # Y 方向位置：应该在 lid 的 Y 方向两侧（距离 side_distance）
+#     y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
+#     # y_error = torch.abs(y_dist - side_distance)
+#     # y_reward = 1.0 - torch.tanh(y_error / std)
+#     y_reward = 1.0 - torch.tanh(y_dist / std)
+    
+#     # Z 方向位置：应该在 lid 上方 height_offset 处
+#     # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
+#     z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
+#     z_reward = 1.0 - torch.tanh(z_error / std)
+    
+#     # 组合位置奖励
+#     position_reward = x_reward * y_reward * z_reward
+    
+#     return position_reward
+
+
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：夹爪相对于lid的位置对齐（简化版）。
+    
+    计算夹爪中心到目标点（lid上方height_offset处）的距离奖励。
+    
+    Args:
+        env: 环境实例
+        lid_cfg: lid的配置
+        robot_cfg: 机器人的配置
+        left_gripper_name: 左侧夹爪body名称
+        right_gripper_name: 右侧夹爪body名称
+        height_offset: Z方向的期望高度偏移（目标点在lid上方）
+        std: tanh核函数的标准差
+        
+    Returns:
+        位置对齐奖励 (num_envs,)
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 目标点：lid位置 + height_offset（Z方向）
+    target_pos = lid_pos_w.clone()
+    # print(target_pos[0])
+    target_pos[:, 2] += height_offset  # 在lid上方height_offset处
+    # print(target_pos[0])
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    # print(gripper_center[0])
+    # 计算夹爪中心到目标点的3D距离
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 距离奖励：距离越小，奖励越大
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# def gripper_lid_orientation_alignment(
+#     env,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     gripper_body_name: str = "left_hand_body",
+#     gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+#     gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+#     lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+# ) -> torch.Tensor:
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 获取姿态
+#     lid_quat_w = lid.data.root_quat_w
+#     body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+#     gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+#     # 3. 垂直对齐 (Vertical Alignment)
+#     # 目标：夹爪 +Y 指向 World -Z
+#     grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+#     # print(grip_fwd_world[0])
+#     world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+#     dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+#     dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)  #add
+#     # 映射 [-1, 1] -> [0, 1]，且全程有梯度
+#     # 即使是朝天 (+1)，只要稍微往下转一点，分数就会增加
+#     # rew_vertical = (dot_vertical + 1.0) / 2.0
+#     val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
+#     rew_vertical = torch.pow(val_vertical, 2) + 1e-4
+#     # nan -> 0
+#     rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+#     # 4. 偏航对齐 (Yaw Alignment)
+#     # 目标：夹爪 +Z 平行于 Lid +Y
+#     grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+#     lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+#     lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+#     dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+#     rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+#     # nan -> 0
+#     rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+#     # 5. 组合
+#     total_reward = rew_vertical * rew_yaw
+
+#     total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+#     # debug
+#     if not torch.isfinite(total_reward).all():
+#         print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+#         print(f"rew_vertical: {rew_vertical}")
+#         print(f"rew_yaw: {rew_yaw}")
+        
+#     return total_reward
+
+
+def gripper_lid_orientation_alignment(
+    env,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "left_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+    lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+    max_angle_deg: float = 60.0,  # 允许的最大角度偏差（度）
+) -> torch.Tensor:
+    
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 获取姿态
+    lid_quat_w = lid.data.root_quat_w
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    # 3. 垂直对齐 (Vertical Alignment)
+    # 目标：夹爪前向轴指向 World -Z（向下）
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+    # 计算点积（归一化后，点积 = cos(角度)）
+    dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+    dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
+    
+    # 计算角度（弧度）
+    angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    
+    # 如果角度 <= max_angle_deg，给予奖励
+    # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_vertical = 1.0 - angle_normalized  # 角度越小，奖励越大
+    
+    # 如果角度超过60度，奖励为0
+    rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
+    
+    # nan -> 0
+    rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+    # 4. 偏航对齐 (Yaw Alignment)
+    # 目标：夹爪 +Z 平行于 Lid +Y
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+    lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+    lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+    dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+    rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_vertical * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {rew_vertical}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
+#     # 注意：确保 joint_names 只包含那两个夹爪关节
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, # 5cm 以内视为“近”，需要闭合
+#     target_open_pos: float = 0.0,  # 【修正】张开是 0.0
+#     target_close_pos: float = 0.03 # 【修正】闭合是 0.03
+# ) -> torch.Tensor:
+#     """
+#     逻辑很简单：
+#     1. 如果 距离 < close_threshold：目标关节位置 = 0.03 (闭合)
+#     2. 如果 距离 >= close_threshold：目标关节位置 = 0.0 (张开)
+#     3. 返回 -abs(当前关节 - 目标关节)，即惩罚误差。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算距离 (末端 vs Lid中心)
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # distance: (num_envs,)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 3. 动态确定目标关节值
+#     # 如果距离小于阈值，目标设为 close_pos，否则设为 open_pos
+#     # target_val: (num_envs, 1)
+#     # print(distance)
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 4. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # joint_pos: (num_envs, 2)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+    
+#     # 取绝对值（防止左指是负数的情况，虽然你的配置里范围看起来都是正的，加上abs更保险）
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 5. 计算误差
+#     # error: (num_envs,) -> 两个手指误差的平均值
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # # 6. 返回负奖励 (惩罚)
+#     # # 误差越大，惩罚越大。完全符合要求时奖励为 0。
+#     # return -error
+#     # =====================================================
+#     # 🚀 核心修复：安全输出
+#     # =====================================================
+#     # 1. 使用 tanh 限制数值范围在 [-1, 0]
+#     reward = -torch.tanh(error / 0.01)
+    
+#     # 2. 过滤 NaN！这一步能救命！
+#     # 如果物理引擎算出 NaN，这里会把它变成 0.0，防止训练崩溃
+#     reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
+    
+#     return reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", 
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, 
+#     target_open_pos: float = 0.0,
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.06,  # 新增：与位置奖励保持一致
+# ) -> torch.Tensor:
+#     """
+#     逻辑：
+#     1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
+#     2. 如果距离 < close_threshold：目标关节位置 = 闭合。
+#     3. 否则：目标关节位置 = 张开。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
+    
+#     # 获取夹爪位置
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # 3. 计算距离 (夹爪 vs 目标抓取点)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 4. 动态确定目标关节值
+#     # 靠近抓取点 -> 闭合；远离 -> 张开
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 5. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 6. 计算误差并返回奖励
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # 使用 tanh 限制数值范围
+#     reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪Z轴高度合适，不能压在盖子上面 (Z-Axis Check)
+    """
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+    
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：Z轴高度判定 (防止压在盖子上)
+    # 夹爪中心必须在把手目标点附近，允许上下微小浮动，但不能太高
+    # 假设 height_offset 是把手几何中心，那么夹爪不应该比它高太多
+    z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
+    is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取关节位置 (绝对值)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    # === 修改闭合判定 ===
+    
+    # 1. 关节位置判定 (Effort Check)
+    # 只要用力了就行，去掉上限判定，防止夹太紧被误判
+    # 假设 0.05 是你的新 target_close_pos
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    # 如果真的夹住了东西，左右指尖的距离应该被把手撑开，不会太小
+    # 获取指尖位置
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    
+    # 假设把手厚度是 1cm (0.01m)
+    # 如果指尖距离 < 0.005，说明两个手指已经碰到一起了（空夹），没夹住东西
+    # 如果指尖距离 >= 0.005，说明中间有东西挡着
+    is_not_empty = dist_fingertips > 0.005
+    
+    # 综合判定：
+    # 1. 在把手附近 (is_near)
+    # 2. 高度合适 (is_z_valid)
+    # 3. 在用力闭合 (is_effort_closing)
+    # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了！
+    return is_near & is_z_valid & is_effort_closing & is_not_empty
+
+
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.02,
+    grasp_radius: float = 0.05
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取当前关节位置绝对值 (num_envs, num_joints)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 计算两个指关节的平均闭合深度
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    # 计算奖励系数：当前位置 / 目标闭合位置
+    # 这样当关节越接近 0.03 时，奖励越接近 1.0
+    # 强制让 Agent 产生“压满”动作的冲动
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+    # 如果没对准或者没夹到，奖励依然是 0
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
+#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
+#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.03,
+#     grasp_radius: float = 0.02 # 球面半径 2cm
+# ) -> torch.Tensor:
+    
+#     # 1. 获取位置
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 把手中心
+    
+#     # 获取左右指尖位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 计算夹爪中心
+#     pos_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 距离判定 (Is Inside Sphere?)
+#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
+#     is_in_sphere = (dist_center < grasp_radius).float()
+    
+#     # 3. "在中间"判定 (Is Between?)
+#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
+#     vec_LR = pos_R - pos_L # 左指指向右指
+#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+    
+#     # 计算投影比例 t
+#     # P_proj = P_L + t * (P_R - P_L)
+#     # t = (vec_LO . vec_LR) / |vec_LR|^2
+#     # 如果 0 < t < 1，说明投影在两个指尖之间
+    
+#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
+#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
+#     t = dot / len_sq
+    
+#     is_between = (t > 0.0) & (t < 1.0)
+#     is_between = is_between.float()
+    
+#     # 4. 闭合判定
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
+#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
+#     # 只有当非常接近闭合目标时才给分
+#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
+    
+#     # 5. 最终奖励
+#     # 只有三者同时满足才给 1.0 分
+#     reward = is_in_sphere * is_between * is_closing
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+# def lid_is_lifted(
+#     env:ManagerBasedRLEnv,
+#     minimal_height:float,
+#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
+# ) -> torch.Tensor:
+
+#     lid = env.scene[lid_cfg.name]
+#     lid_height = lid.data.root_pos_w[:, 2]
+#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
+#     reward=torch.nan_to_num(reward, nan=0.0)
+    
+#     return reward
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+
+def lid_on_table_area_tanh_reward(
+    env: ManagerBasedRLEnv,
+    std_xy: float,
+    std_z: float,
+    table_cfg: SceneEntityCfg,
+    table_dims: tuple[float, float], # 桌面的长和宽 (x_size, y_size)
+    surface_height: float,           # 桌面相对于桌子中心的 Z 偏移
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    # 抓取判定参数 (与你之前的逻辑一致)
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.02, 
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    """
+    基于区域的 Tanh 奖励：只要盖子在桌面上方范围内，且高度合适，即给高分。
+    """
+    # 1. 获取数据
+    lid: RigidObject = env.scene[lid_cfg.name]
+    table: RigidObject = env.scene[table_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    table_pos_w = table.data.root_pos_w[:, :3]
+    
+    # 2. 计算 XY 轴相对于桌子中心的偏移
+    # 假设桌子是沿轴对齐的 (Axis-Aligned)
+    rel_pos_xy = lid_pos_w[:, :2] - table_pos_w[:, :2]
+    
+    # 计算点到矩形的距离 (Distance to Rectangle)
+    # dx, dy 是盖子中心超出桌面边缘的距离
+    half_width = table_dims[0] / 2.0
+    half_depth = table_dims[1] / 2.0
+    
+    dx = torch.clamp(torch.abs(rel_pos_xy[:, 0]) - half_width, min=0.0)
+    dy = torch.clamp(torch.abs(rel_pos_xy[:, 1]) - half_depth, min=0.0)
+    dist_xy = torch.sqrt(dx**2 + dy**2)
+    
+    # 3. 计算 Z 轴高度偏差
+    # 目标高度是桌子中心高度 + 表面偏移
+    target_z = table_pos_w[:, 2] + surface_height
+    dist_z = torch.abs(lid_pos_w[:, 2] - target_z)
+    
+    # 4. 判定是否抓稳
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 5. 计算 Tanh 奖励
+    # 只要在矩形内，dist_xy=0，reward_xy=1.0
+    reward_xy = 1.0 - torch.tanh(dist_xy / std_xy)
+    reward_z = 1.0 - torch.tanh(dist_z / std_z)
+    
+    # 综合奖励：XY 必须对准且 Z 必须对准
+    total_reward = reward_xy * reward_z
+    
+    # 只有抓稳了才给引导分
+    return torch.where(is_grasped, total_reward, torch.zeros_like(total_reward))
+
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
+# def lid_grasped_bonus(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg,
+#     robot_cfg: SceneEntityCfg,
+#     left_gripper_name: str,
+#     joint_names: list,
+#     distance_threshold: float = 0.05,
+#     closed_pos: float = 0.03
+# ) -> torch.Tensor:
+#     # 1. 计算距离
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+#     lid_pos = lid.data.root_pos_w[:, :3]
+#     distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    
+#     # 2. 检查距离是否达标
+#     is_close = distance < distance_threshold
+    
+#     # 3. 检查夹爪是否闭合
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     # 检查是否接近闭合 (比如 > 0.02)
+#     is_closed = torch.all(joint_pos > (closed_pos * 0.8), dim=-1)
+    
+#     # 4. 给分
+#     # 只有同时满足才给 1.0
+#     reward = torch.where(is_close & is_closed, 1.0, 0.0)
+    
+#     return torch.nan_to_num(reward)
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/terminations.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mdp/terminations.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
+
+
+def base_height_failure(env: ManagerBasedRLEnv,
+                        minimum_height: float | None = None,
+                        maximum_height: float | None = None,
+                        asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Terminate when the robot's base height is outside the specified range."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    root_pos_z = asset.data.root_pos_w[:, 2]
+    
+    # check if height is outside the range
+    out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
+    if minimum_height is not None:
+        out_of_bounds |= root_pos_z < minimum_height
+    if maximum_height is not None:
+        out_of_bounds |= root_pos_z > maximum_height
+        
+    return out_of_bounds
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mindbot_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLid2Table/mindbot_env_cfg.py
@@ -0,0 +1,492 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, 0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.2, 0.65, 0.9],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=[".*"],
+            
+            stiffness=0.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class MindbotSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+
+
+##
+# MDP settings
+##
+
+def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    left_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["l_joint[1-6]"],  # 左臂的6个关节
+        body_name="left_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+
+    # grippers_position = mdp.JointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["left_hand_joint_.*"],
+    #     scale=1,
+    #     clip={
+    #         "left_hand_joint_left": (-0.03, 0.0),
+    #         "left_hand_joint_right": (0.0, 0.03),
+    #     },
+    # )
+
+    # grippers_position = mdp.CoupledJointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+    #     scale=0.015,   # (0.03 - 0) / 2
+    #     offset=0.015,  # (0.03 + 0) / 2
+    #     clip=None
+    # )
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+        
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
+        
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+    )
+
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.3,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
+        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+    # reset_mindbot_root=EventTerm(
+    #     func=mdp.reset_root_state_uniform,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg":SceneEntityCfg("Mindbot"),
+    #         "pose_range":{"x":(0.0, 0.0), "y":(0.0, 0.0)},
+    #         "velocity_range":{"x":(0.0, 0.0), "y":(0.0, 0.0)},
+    #     },
+    # )
+    # 重置所有关节到 init_state（无偏移）
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 只对左臂添加随机偏移
+    reset_mindbot_left_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+        params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
+        params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+    
+    # 姿态对齐奖励： stage 1
+    # gripper_lid_orientation_alignment = RewTerm(
+    #     func=mdp.gripper_lid_orientation_alignment,
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "robot_cfg": SceneEntityCfg("Mindbot"),
+    #         "gripper_body_name": "left_hand_body",
+    #         "gripper_forward_axis": (0.0, 0.0, 1.0), 
+    #         "gripper_width_axis": (0.0, 1.0, 0.0), 
+    #         "lid_handle_axis": (0.0, 1.0, 0.0)
+    #     },
+    #     weight=1.0
+    # )
+
+    gripper_lid_orientation_alignment = RewTerm(
+        func=mdp.gripper_lid_orientation_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "gripper_body_name": "left_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 1.0, 0.0),
+            "max_angle_deg": 85.0,  # 允许60度内的偏差
+        },
+        weight=5
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    gripper_lid_position_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度
+        },
+        weight=3.0
+    )
+
+    # 【新增】精细对齐 (引导进入 2cm 圈)
+    gripper_lid_fine_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+        },
+        weight=10.0 # 高权重
+    )
+
+    gripper_close_interaction = RewTerm( 
+        func=mdp.gripper_close_when_near,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
+            "target_close_pos": 0.03,
+            "height_offset": 0.04,
+            "grasp_radius": 0.03,
+        },
+        weight=10.0
+    )
+
+    lid_on_table_area = RewTerm(
+        func=mdp.lid_on_table_area_tanh_reward,
+        params={
+            "table_cfg": SceneEntityCfg("table"),
+            "table_dims": (1.2, 0.8),    # 桌面 XY 尺寸
+            "surface_height": 0.82,      # 桌面 Z 高度 (相对于桌子底部)
+            "std_xy": 0.5,               # XY 引导范围 0.5m
+            "std_z": 0.1,                # Z 引导精度 0.1m
+            "height_offset": 0.07,       # 抓取判定
+        },
+        weight=20.0
+    )
+
+    # lid_lifted_reward = RewTerm(
+    #     func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+    #     },
+    #     weight=10.0 # 给一个足够大的权重
+    # )
+
+    # lid_lifting_reward = RewTerm(
+    #     func=mdp.lid_lift_progress_reward,
+    #     params={
+    #         "lid_cfg": SceneEntityCfg("lid"),
+    #         "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+    #         "target_height_lift": 0.2,
+    #         "height_offset": 0.07, # 与其他奖励保持一致
+    #         "std": 0.1
+    #     },
+    #     # 权重设大一点，告诉它这是最终目标
+    #     weight=10.0 
+    # )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    lid_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    )
+
+    # 新增：盖子掉落判定
+    # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    lid_dropped = DoneTerm(
+        func=mdp.base_height_failure, # 复用高度判定函数
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+        },
+    )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class MindbotEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/init.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+gym.register(
+    id="Pull-Ultrasound-Lid-Up-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
+        "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
+        "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/init.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,78 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  # doesn't have this fine grained control but made it close
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [32, 32]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: cartpole_direct
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 0.1
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 5e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 150
+    save_best_after: 50
+    save_frequency: 25
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    seq_length: 4
+    bounds_loss_coef: 0.0001
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 48  # 稍微增加每轮步数
+    max_iterations = 5000   # 增加迭代次数，给它足够的时间学习
+    save_interval = 100
+    experiment_name = "mindbot_grasp"  # 建议改个名，不要叫 cartpole 了
+    
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.7,
+        actor_obs_normalization=True,   # 开启归一化
+        critic_obs_normalization=True,  # 开启归一化
+        # 增加网络容量：三层 MLP，宽度足够
+        actor_hidden_dims=[128, 64, 32], 
+        critic_hidden_dims=[128, 64, 32],
+        activation="elu",
+    )
+    
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005, # 保持适度的探索
+        num_learning_epochs=5,
+        num_mini_batches=8,
+        learning_rate=3e-4, # 如果网络变大后不稳定，可以尝试降低到 5.0e-4
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+
+
+# @configclass
+# class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
+#     num_steps_per_env = 16
+#     max_iterations = 150
+#     save_interval = 50
+#     experiment_name = "cartpole_direct"
+#     policy = RslRlPpoActorCriticCfg(
+#         init_noise_std=1.0,
+#         actor_obs_normalization=False,
+#         critic_obs_normalization=False,
+#         actor_hidden_dims=[32, 32],
+#         critic_hidden_dims=[32, 32],
+#         activation="elu",
+#     )
+#     algorithm = RslRlPpoAlgorithmCfg(
+#         value_loss_coef=1.0,
+#         use_clipped_value_loss=True,
+#         clip_param=0.2,
+#         entropy_coef=0.005,
+#         num_learning_epochs=5,
+#         num_mini_batches=4,
+#         learning_rate=1.0e-3,
+#         schedule="adaptive",
+#         gamma=0.99,
+#         lam=0.95,
+#         desired_kl=0.01,
+#         max_grad_norm=1.0,
+#     )
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,20 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs:
+  activation_fn: nn.ELU
+  net_arch: [32, 32]
+  squash_output: False
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
@@ -0,0 +1,111 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: -2.9
+    fixed_log_std: True
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+  discriminator:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [1024, 512]
+        activations: relu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+# AMP memory (reference motion dataset)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+motion_dataset:
+  class: RandomMemory
+  memory_size: 200000
+
+# AMP memory (preventing discriminator overfitting)
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+reply_buffer:
+  class: RandomMemory
+  memory_size: 1000000
+
+
+# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
+agent:
+  class: AMP
+  rollouts: 16
+  learning_epochs: 6
+  mini_batches: 2
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-05
+  learning_rate_scheduler: null
+  learning_rate_scheduler_kwargs: null
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  amp_state_preprocessor: RunningStandardScaler
+  amp_state_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 0.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.5
+  discriminator_loss_scale: 5.0
+  amp_batch_size: 512
+  task_reward_weight: 0.0
+  style_reward_weight: 1.0
+  discriminator_batch_size: 4096
+  discriminator_reward_scale: 2.0
+  discriminator_logit_regularization_scale: 0.05
+  discriminator_gradient_penalty_scale: 5.0
+  discriminator_weight_decay_scale: 1.0e-04
+  # rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "humanoid_amp_run"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 80000
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
+agent:
+  class: IPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 8
+  mini_batches: 1
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 3.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cart_double_pendulum_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [32, 32]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 32
+  learning_epochs: 8
+  mini_batches: 8
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.008
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.1
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "cartpole_direct"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 4800
+  environment_info: log
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/init.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""This sub-module contains the functions that are specific to the environment."""
+
+from isaaclab.envs.mdp import *  # noqa: F401, F403
+
+from .rewards import *  # noqa: F401, F403
+from .terminations import *  # noqa: F401, F403
+from .gripper import *  # noqa: F401, F403
+from .curriculums import *  # noqa: F401, F403
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+from isaaclab.envs.mdp import modify_term_cfg 
+
+def annealing_std(
+    env: ManagerBasedRLEnv, 
+    env_ids: torch.Tensor, 
+    current_value: float, 
+    start_step: int, 
+    end_step: int, 
+    start_std: float, 
+    end_std: float
+):
+    """
+    根据步数线性退火 std 值。
+    
+    Args:
+        current_value: 当前的参数值 (系统自动传入)
+        start_step: 开始退火的步数
+        end_step: 结束退火的步数
+        start_std: 初始 std
+        end_std: 最终 std
+    """
+    current_step = env.common_step_counter
+    
+    # 1. 还没到开始时间 -> 保持初始值 (或者不改)
+    if current_step < start_step:
+        # 如果当前值还没被设为 start_std，就强制设一下，否则不动
+        return start_std
+        
+    # 2. 已经超过结束时间 -> 保持最终值
+    elif current_step >= end_step:
+        return end_std
+        
+    # 3. 在中间 -> 线性插值
+    else:
+        ratio = (current_step - start_step) / (end_step - start_step)
+        new_std = start_std + (end_std - start_std) * ratio
+        return new_std
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py
@@ -0,0 +1,54 @@
+# 假设这是在你的 mdp.py 文件中
+
+from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg
+from isaaclab.envs.mdp.actions.joint_actions import JointPositionAction
+from isaaclab.utils import configclass
+import torch
+
+
+class CoupledJointPositionAction(JointPositionAction):
+    def __init__(self, cfg: 'CoupledJointPositionActionCfg', env): 
+        super().__init__(cfg, env)
+
+    @property
+    def action_dim(self) -> int:
+        """强制 ActionManager 认为只需要 1D 输入。"""
+        return 1
+
+    """
+    这是运行时被实例化的类。它继承自 JointPositionAction。
+    """
+    def process_actions(self, actions: torch.Tensor):
+        scale = self.cfg.scale
+        offset = self.cfg.offset
+        # store the raw actions
+        self._raw_actions[:] = torch.clamp(actions, -1, 1)
+        # apply the affine transformations
+        target_position_interval = self._raw_actions * scale + offset
+        right_cmd = target_position_interval
+        left_cmd = -target_position_interval
+        # import pdb; pdb.set_trace()
+        self._processed_actions = torch.stack((left_cmd, right_cmd), dim=1).squeeze(-1)
+        # print(f"[DEBUG] In: {actions[0]}, {self._processed_actions[0]}")
+        # clip actions
+        if self.cfg.clip is not None:
+            self._processed_actions = torch.clamp(
+                self._processed_actions, min=self._clip[:, :, 0], max=self._clip[:, :, 1]
+            )
+
+
+@configclass
+class CoupledJointPositionActionCfg(JointPositionActionCfg):
+    """
+    配置类。关键在于设置 class_type 指向上面的实现类。
+    """
+    # !!! 关键点 !!! 
+    # 告诉 ActionManager: "当你看到这个配置时，请实例化 CoupledJointPositionAction 这个类"
+    class_type = CoupledJointPositionAction 
+
+    def __post_init__(self) -> None:
+        # 这里的检查逻辑是没问题的，因为 ActionManager 会在初始化时调用它
+        if len(self.joint_names) != 2:
+            raise ValueError("Requires exactly two joint names.")
+        super().__post_init__()
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py
@@ -0,0 +1,808 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import wrap_to_pi, quat_error_magnitude, quat_apply, normalize
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
+
+
+def lid_is_lifted(env: ManagerBasedRLEnv, minimal_height: float,
+                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    """Reward the agent for lifting the lid above the minimal height."""
+    lid: RigidObject = env.scene[lid_cfg.name]
+    # root_pos_w shape: (num_envs, 3) - extract z-coordinate: (num_envs,)
+    height = lid.data.root_pos_w[:, 2]
+    return torch.where(height > minimal_height, 1.0, 0.0)
+
+def gripper_lid_distance(env: ManagerBasedRLEnv, std: float,
+                         lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                         gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for reaching the lid using tanh-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+
+    # float 化，防止成为 (3,) 向量
+    if not isinstance(std, float):
+        if torch.is_tensor(std):
+            std = std.item()
+        else:
+            std = float(std)
+
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    # body_idx = robot.find_bodies(gripper_body_name)[0]
+    # gripper_pos_w = robot.data.body_pos_w[:, body_idx, :].squeeze(dim=1)
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :]
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1) + 1e-6
+    
+    return 1 - torch.tanh(distance / std)
+
+
+def lid_grasped(env: ManagerBasedRLEnv,
+                distance_threshold: float = 0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                robot_cfg: SceneEntityCfg = SceneEntityCfg     ("Mindbot"),
+                gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Reward the agent for grasping the lid (close and lifted)."""
+    # extract the used quantities (to enable type-hinting)
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    # Target lid position: (num_envs, 3)
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # Gripper position: (num_envs, 3)
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    
+    # Distance of the gripper to the lid: (num_envs,)
+    distance = torch.norm(lid_pos_w - gripper_pos_w, dim=1)
+    
+    # Check if close and lifted
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > 0.1
+    
+    return torch.where(is_close & is_lifted, 1.0, 0.0)
+
+def gripper_lid_distance_combined(env: ManagerBasedRLEnv, std: float, minimal_height: float,
+                                  lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                                  robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                  gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    """Combined reward for reaching the lid AND lifting it."""
+    # Get reaching reward
+    reach_reward = gripper_lid_distance(env, std, lid_cfg, robot_cfg, gripper_body_name)
+    # Get lifting reward
+    lift_reward = lid_is_lifted(env, minimal_height, lid_cfg)
+    # Combine rewards multiplicatively
+    return reach_reward * lift_reward
+
+
+
+# def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+#                                         lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#                                         robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#                                         left_gripper_name: str = "left_hand__l",
+#                                         right_gripper_name: str = "left_hand_r",
+#                                         height_offset: float = 0.1,
+#                                         std: float = 0.1) -> torch.Tensor:
+#     """奖励函数：夹爪相对于lid的位置对齐。
+    
+#     Args:
+#         env: 环境实例
+#         lid_cfg: lid的配置
+#         robot_cfg: 机器人的配置
+#         left_gripper_name: 左侧夹爪body名称
+#         right_gripper_name: 右侧夹爪body名称
+#         side_distance: Y方向的期望距离
+#         height_offset: Z方向的期望高度偏移
+#         std: tanh核函数的标准差
+        
+#     Returns:
+#         位置对齐奖励 (num_envs,)
+#     """
+#     lid: RigidObject = env.scene[lid_cfg.name]
+#     robot: Articulation = env.scene[robot_cfg.name]
+    
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     # 获取两个夹爪的位置
+#     left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+#     right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+#     left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+#     right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+#     # 计算夹爪中心位置
+#     gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    
+#     # 计算相对位置（夹爪中心相对于 lid 中心）
+#     rel_pos = gripper_center - lid_pos_w  # (num_envs, 3)
+    
+#     # X 方向位置：应该对齐（接近 0）
+#     x_dist = torch.abs(rel_pos[:, 0]) + 1e-6
+#     x_reward = 1.0 - torch.tanh(x_dist / std)
+    
+#     # Y 方向位置：应该在 lid 的 Y 方向两侧（距离 side_distance）
+#     y_dist = torch.abs(rel_pos[:, 1]) + 1e-6
+#     # y_error = torch.abs(y_dist - side_distance)
+#     # y_reward = 1.0 - torch.tanh(y_error / std)
+#     y_reward = 1.0 - torch.tanh(y_dist / std)
+    
+#     # Z 方向位置：应该在 lid 上方 height_offset 处
+#     # z_error = torch.clamp(torch.abs(rel_pos[:, 2] - height_offset),-3,3)+ 1e-6
+#     z_error = torch.abs(rel_pos[:, 2] - height_offset) + 1e-6
+#     z_reward = 1.0 - torch.tanh(z_error / std)
+    
+#     # 组合位置奖励
+#     position_reward = x_reward * y_reward * z_reward
+    
+#     return position_reward
+
+
+def gripper_lid_position_alignment(env: ManagerBasedRLEnv,
+                                    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                                    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                                    left_gripper_name: str = "left_hand__l",
+                                    right_gripper_name: str = "left_hand_r",
+                                    height_offset: float = 0.02,
+                                    std: float = 0.1) -> torch.Tensor:
+    """奖励函数：夹爪相对于lid的位置对齐（简化版）。
+    
+    计算夹爪中心到目标点（lid上方height_offset处）的距离奖励。
+    
+    Args:
+        env: 环境实例
+        lid_cfg: lid的配置
+        robot_cfg: 机器人的配置
+        left_gripper_name: 左侧夹爪body名称
+        right_gripper_name: 右侧夹爪body名称
+        height_offset: Z方向的期望高度偏移（目标点在lid上方）
+        std: tanh核函数的标准差
+        
+    Returns:
+        位置对齐奖励 (num_envs,)
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 目标点：lid位置 + height_offset（Z方向）
+    target_pos = lid_pos_w.clone()
+    # print(target_pos[0])
+    target_pos[:, 2] += height_offset  # 在lid上方height_offset处
+    # print(target_pos[0])
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]  # (num_envs, 3)
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]  # (num_envs, 3)
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0  # (num_envs, 3)
+    # print(gripper_center[0])
+    # 计算夹爪中心到目标点的3D距离
+    distance = torch.norm(gripper_center - target_pos, dim=1) + 1e-6
+    
+    # 距离奖励：距离越小，奖励越大
+    position_reward = 1.0 - torch.tanh(distance / std)
+    
+    return position_reward
+
+
+# def gripper_lid_orientation_alignment(
+#     env,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     gripper_body_name: str = "left_hand_body",
+#     gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+#     gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+#     lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+# ) -> torch.Tensor:
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 获取姿态
+#     lid_quat_w = lid.data.root_quat_w
+#     body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+#     gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+#     # 3. 垂直对齐 (Vertical Alignment)
+#     # 目标：夹爪 +Y 指向 World -Z
+#     grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+#     # print(grip_fwd_world[0])
+#     world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+#     dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+#     dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)  #add
+#     # 映射 [-1, 1] -> [0, 1]，且全程有梯度
+#     # 即使是朝天 (+1)，只要稍微往下转一点，分数就会增加
+#     # rew_vertical = (dot_vertical + 1.0) / 2.0
+#     val_vertical = torch.clamp((dot_vertical + 1.0) / 2.0, 0.0, 1.0)
+#     rew_vertical = torch.pow(val_vertical, 2) + 1e-4
+#     # nan -> 0
+#     rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+#     # 4. 偏航对齐 (Yaw Alignment)
+#     # 目标：夹爪 +Z 平行于 Lid +Y
+#     grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+#     grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+#     lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+#     lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+#     dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+#     rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+#     # nan -> 0
+#     rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+#     # 5. 组合
+#     total_reward = rew_vertical * rew_yaw
+
+#     total_reward=torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+#     # debug
+#     if not torch.isfinite(total_reward).all():
+#         print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+#         print(f"rew_vertical: {rew_vertical}")
+#         print(f"rew_yaw: {rew_yaw}")
+        
+#     return total_reward
+
+
+def gripper_lid_orientation_alignment(
+    env,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    gripper_body_name: str = "left_hand_body",
+    gripper_forward_axis: tuple = (0.0, 0.0, 1.0),  # 修正为 Z
+    gripper_width_axis: tuple = (0.0, 1.0, 0.0),    # 修正为 Y (用于与把手平行)
+    lid_handle_axis: tuple = (0.0, 1.0, 0.0),       # 修正为 Y
+    max_angle_deg: float = 60.0,  # 允许的最大角度偏差（度）
+) -> torch.Tensor:
+    
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 获取姿态
+    lid_quat_w = lid.data.root_quat_w
+    body_ids, _ = robot.find_bodies([gripper_body_name], preserve_order=True)
+    gripper_quat_w = robot.data.body_quat_w[:, body_ids[0], :]
+
+    # 3. 垂直对齐 (Vertical Alignment)
+    # 目标：夹爪前向轴指向 World -Z（向下）
+    grip_fwd_local = torch.tensor(gripper_forward_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_fwd_world = quat_apply(gripper_quat_w, grip_fwd_local)
+    world_down = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1)
+    
+    # 计算点积（归一化后，点积 = cos(角度)）
+    dot_vertical = torch.sum(grip_fwd_world * world_down, dim=1)
+    dot_vertical = torch.clamp(dot_vertical, -1.0, 1.0)
+    
+    # 计算角度（弧度）
+    angle_rad = torch.acos(torch.clamp(dot_vertical, -1.0, 1.0))
+    max_angle_rad = torch.tensor(max_angle_deg * 3.14159265359 / 180.0, device=env.device)
+    
+    # 如果角度 <= max_angle_deg，给予奖励
+    # 奖励从角度=0时的1.0平滑衰减到角度=max_angle时的0.0
+    angle_normalized = torch.clamp(angle_rad / max_angle_rad, 0.0, 1.0)
+    rew_vertical = 1.0 - angle_normalized  # 角度越小，奖励越大
+    
+    # 如果角度超过60度，奖励为0
+    rew_vertical = torch.where(angle_rad <= max_angle_rad, rew_vertical, torch.zeros_like(rew_vertical))
+    
+    # nan -> 0
+    rew_vertical = torch.where(torch.isnan(rew_vertical), torch.zeros_like(rew_vertical), rew_vertical)
+
+    # 4. 偏航对齐 (Yaw Alignment)
+    # 目标：夹爪 +Z 平行于 Lid +Y
+    grip_width_local = torch.tensor(gripper_width_axis, device=env.device).repeat(env.num_envs, 1)
+    grip_width_world = quat_apply(gripper_quat_w, grip_width_local)
+    
+    lid_handle_local = torch.tensor(lid_handle_axis, device=env.device).repeat(env.num_envs, 1)
+    lid_handle_world = quat_apply(lid_quat_w, lid_handle_local)
+    
+    dot_yaw = torch.sum(grip_width_world * lid_handle_world, dim=1)
+    rew_yaw = torch.square(torch.abs(dot_yaw)) + 1e-4
+    # nan -> 0
+    rew_yaw = torch.where(torch.isnan(rew_yaw), torch.zeros_like(rew_yaw), rew_yaw)
+
+    # 5. 组合
+    total_reward = rew_vertical * rew_yaw
+
+    total_reward = torch.nan_to_num(total_reward, nan=0.0, posinf=0.0, neginf=0.0)
+
+    # debug
+    if not torch.isfinite(total_reward).all():
+        print(f"[DEBUG] Non-finite reward detected in gripper_lid_orientation_alignment: {total_reward}")
+        print(f"rew_vertical: {rew_vertical}")
+        print(f"rew_yaw: {rew_yaw}")
+        
+    return total_reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", # 用于计算距离的 body
+#     # 注意：确保 joint_names 只包含那两个夹爪关节
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, # 5cm 以内视为“近”，需要闭合
+#     target_open_pos: float = 0.0,  # 【修正】张开是 0.0
+#     target_close_pos: float = 0.03 # 【修正】闭合是 0.03
+# ) -> torch.Tensor:
+#     """
+#     逻辑很简单：
+#     1. 如果 距离 < close_threshold：目标关节位置 = 0.03 (闭合)
+#     2. 如果 距离 >= close_threshold：目标关节位置 = 0.0 (张开)
+#     3. 返回 -abs(当前关节 - 目标关节)，即惩罚误差。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算距离 (末端 vs Lid中心)
+#     lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # distance: (num_envs,)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 3. 动态确定目标关节值
+#     # 如果距离小于阈值，目标设为 close_pos，否则设为 open_pos
+#     # target_val: (num_envs, 1)
+#     # print(distance)
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 4. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # joint_pos: (num_envs, 2)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+    
+#     # 取绝对值（防止左指是负数的情况，虽然你的配置里范围看起来都是正的，加上abs更保险）
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 5. 计算误差
+#     # error: (num_envs,) -> 两个手指误差的平均值
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # # 6. 返回负奖励 (惩罚)
+#     # # 误差越大，惩罚越大。完全符合要求时奖励为 0。
+#     # return -error
+#     # =====================================================
+#     # 🚀 核心修复：安全输出
+#     # =====================================================
+#     # 1. 使用 tanh 限制数值范围在 [-1, 0]
+#     reward = -torch.tanh(error / 0.01)
+    
+#     # 2. 过滤 NaN！这一步能救命！
+#     # 如果物理引擎算出 NaN，这里会把它变成 0.0，防止训练崩溃
+#     reward = torch.nan_to_num(reward, nan=0.0, posinf=0.0, neginf=-1.0)
+    
+#     return reward
+
+
+# def gripper_open_close_reward(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     left_gripper_name: str = "left_hand_body", 
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     close_threshold: float = 0.05, 
+#     target_open_pos: float = 0.0,
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.06,  # 新增：与位置奖励保持一致
+# ) -> torch.Tensor:
+#     """
+#     逻辑：
+#     1. 计算夹爪到【目标抓取点】(Lid上方 height_offset 处) 的距离。
+#     2. 如果距离 < close_threshold：目标关节位置 = 闭合。
+#     3. 否则：目标关节位置 = 张开。
+#     """
+    
+#     # 1. 获取对象
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+    
+#     # 2. 计算目标抓取点位置 (Lid位置 + Z轴偏移)
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 修正为抓取点位置
+    
+#     # 获取夹爪位置
+#     body_ids, _ = robot.find_bodies([left_gripper_name])
+#     gripper_pos_w = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+#     # 3. 计算距离 (夹爪 vs 目标抓取点)
+#     distance = torch.norm(gripper_pos_w - lid_pos_w, dim=-1)
+    
+#     # 4. 动态确定目标关节值
+#     # 靠近抓取点 -> 闭合；远离 -> 张开
+#     target_val = torch.where(
+#         distance < close_threshold,
+#         torch.tensor(target_close_pos, device=env.device),
+#         torch.tensor(target_open_pos, device=env.device)
+#     ).unsqueeze(1)
+    
+#     # 5. 获取当前关节位置
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     current_joint_pos = robot.data.joint_pos[:, joint_ids]
+#     current_joint_pos_abs = torch.abs(current_joint_pos)
+    
+#     # 6. 计算误差并返回奖励
+#     error = torch.mean(torch.abs(current_joint_pos_abs - target_val), dim=1)
+    
+#     # 使用 tanh 限制数值范围
+#     reward = 1.0 - torch.tanh(error / 0.01) # 误差越小奖励越大
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg,
+    robot_cfg: SceneEntityCfg,
+    left_gripper_name: str,
+    right_gripper_name: str,
+    joint_names: list,
+    height_offset: float,
+    grasp_radius: float,
+    target_close_pos: float
+) -> torch.Tensor:
+    """
+    内部辅助函数：判定是否成功抓取。
+    条件：
+    1. 夹爪中心在把手目标点附近 (Sphere Check)
+    2. 夹爪处于闭合发力状态 (Joint Effort Check)
+    3. (关键) 夹爪Z轴高度合适，不能压在盖子上面 (Z-Axis Check)
+    """
+    # 1. 获取对象
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 2. 目标点位置 (把手中心)
+    lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+    lid_pos_w[:, 2] += height_offset 
+    
+    # 3. 夹爪位置
+    l_ids, _ = robot.find_bodies([left_gripper_name])
+    r_ids, _ = robot.find_bodies([right_gripper_name])
+    pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+    pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    gripper_center = (pos_L + pos_R) / 2.0
+    
+    # 4. 条件一：距离判定 (在把手球范围内)
+    dist_to_handle = torch.norm(gripper_center - lid_pos_w, dim=-1)
+    is_near = dist_to_handle < grasp_radius
+    
+    # 5. 条件二：Z轴高度判定 (防止压在盖子上)
+    # 夹爪中心必须在把手目标点附近，允许上下微小浮动，但不能太高
+    # 假设 height_offset 是把手几何中心，那么夹爪不应该比它高太多
+    z_diff = gripper_center[:, 2] - lid_pos_w[:, 2]
+    is_z_valid = torch.abs(z_diff) < 0.03 # 3cm 容差
+    
+    # 6. 条件三：闭合判定 (正在尝试闭合)
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取关节位置 (绝对值)
+    joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    
+    # === 修改闭合判定 ===
+    
+    # 1. 关节位置判定 (Effort Check)
+    # 只要用力了就行，去掉上限判定，防止夹太紧被误判
+    # 假设 0.05 是你的新 target_close_pos
+    is_effort_closing = torch.all(joint_pos > 0.005, dim=1)
+    
+    # 2. 几何排斥判定 (Geometry Check)
+    # 如果真的夹住了东西，左右指尖的距离应该被把手撑开，不会太小
+    # 获取指尖位置
+    dist_fingertips = torch.norm(pos_L - pos_R, dim=-1)
+    
+    # 假设把手厚度是 1cm (0.01m)
+    # 如果指尖距离 < 0.005，说明两个手指已经碰到一起了（空夹），没夹住东西
+    # 如果指尖距离 >= 0.005，说明中间有东西挡着
+    is_not_empty = dist_fingertips > 0.005
+    
+    # 综合判定：
+    # 1. 在把手附近 (is_near)
+    # 2. 高度合适 (is_z_valid)
+    # 3. 在用力闭合 (is_effort_closing)
+    # 4. 指尖没贴死 (is_not_empty) -> 这就证明夹住东西了！
+    return is_near & is_z_valid & is_effort_closing & is_not_empty
+
+
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    target_close_pos: float = 0.03,
+    height_offset: float = 0.02,
+    grasp_radius: float = 0.05
+) -> torch.Tensor:
+    
+    # 1. 判定基础抓取条件是否满足
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算夹紧程度 (Clamping Intensity)
+    robot = env.scene[robot_cfg.name]
+    joint_ids, _ = robot.find_joints(joint_names)
+    # 获取当前关节位置绝对值 (num_envs, num_joints)
+    current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 计算两个指关节的平均闭合深度
+    mean_joint_pos = torch.mean(current_joint_pos, dim=1)
+    
+    # 计算奖励系数：当前位置 / 目标闭合位置
+    # 这样当关节越接近 0.03 时，奖励越接近 1.0
+    # 强制让 Agent 产生“压满”动作的冲动
+    clamping_factor = torch.clamp(mean_joint_pos / target_close_pos, max=1.0)
+    
+    # 3. 只有在满足抓取判定时，才发放带有权重的夹紧奖励
+    # 如果没对准或者没夹到，奖励依然是 0
+    return torch.where(is_grasped, clamping_factor, 0.0)
+
+# def gripper_close_when_near(
+#     env: ManagerBasedRLEnv,
+#     lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+#     robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+#     # 注意：这里我们需要具体的指尖 body 名字来做几何判定
+#     left_gripper_name: str = "left_hand__l", # 请确认你的USD里左指尖body名
+#     right_gripper_name: str = "left_hand_r", # 请确认你的USD里右指尖body名
+#     joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+#     target_close_pos: float = 0.03,
+#     height_offset: float = 0.03,
+#     grasp_radius: float = 0.02 # 球面半径 2cm
+# ) -> torch.Tensor:
+    
+#     # 1. 获取位置
+#     lid = env.scene[lid_cfg.name]
+#     robot = env.scene[robot_cfg.name]
+#     lid_pos_w = lid.data.root_pos_w[:, :3].clone()
+#     lid_pos_w[:, 2] += height_offset # 把手中心
+    
+#     # 获取左右指尖位置
+#     l_ids, _ = robot.find_bodies([left_gripper_name])
+#     r_ids, _ = robot.find_bodies([right_gripper_name])
+#     pos_L = robot.data.body_pos_w[:, l_ids[0], :3]
+#     pos_R = robot.data.body_pos_w[:, r_ids[0], :3]
+    
+#     # 计算夹爪中心
+#     pos_center = (pos_L + pos_R) / 2.0
+    
+#     # 2. 距离判定 (Is Inside Sphere?)
+#     dist_center = torch.norm(pos_center - lid_pos_w, dim=-1)
+#     is_in_sphere = (dist_center < grasp_radius).float()
+    
+#     # 3. "在中间"判定 (Is Between?)
+#     # 投影逻辑：物体投影在 LR 连线上，且位于 L 和 R 之间
+#     vec_LR = pos_R - pos_L # 左指指向右指
+#     vec_LO = lid_pos_w - pos_L # 左指指向物体
+    
+#     # 计算投影比例 t
+#     # P_proj = P_L + t * (P_R - P_L)
+#     # t = (vec_LO . vec_LR) / |vec_LR|^2
+#     # 如果 0 < t < 1，说明投影在两个指尖之间
+    
+#     len_sq = torch.sum(vec_LR * vec_LR, dim=-1) + 1e-6
+#     dot = torch.sum(vec_LO * vec_LR, dim=-1)
+#     t = dot / len_sq
+    
+#     is_between = (t > 0.0) & (t < 1.0)
+#     is_between = is_between.float()
+    
+#     # 4. 闭合判定
+#     joint_ids, _ = robot.find_joints(joint_names)
+#     # 注意：如果是 Binary Action，可能很难拿到连续的 0~0.03 控制值，如果是仿真状态则没问题
+#     current_joint_pos = torch.abs(robot.data.joint_pos[:, joint_ids])
+#     close_error = torch.mean(torch.abs(current_joint_pos - target_close_pos), dim=1)
+#     # 只有当非常接近闭合目标时才给分
+#     is_closing = (close_error < 0.005).float() # 允许 5mm 误差
+    
+#     # 5. 最终奖励
+#     # 只有三者同时满足才给 1.0 分
+#     reward = is_in_sphere * is_between * is_closing
+    
+#     return torch.nan_to_num(reward, nan=0.0)
+
+
+# def lid_is_lifted(
+#     env:ManagerBasedRLEnv,
+#     minimal_height:float,
+#     lid_cfg:SceneEntityCfg = SceneEntityCfg("lid")
+# ) -> torch.Tensor:
+
+#     lid = env.scene[lid_cfg.name]
+#     lid_height = lid.data.root_pos_w[:, 2]
+#     reward=torch.where(lid_height > minimal_height, 1.0, 0.0)
+#     reward=torch.nan_to_num(reward, nan=0.0)
+    
+#     return reward
+
+
+def lid_is_lifted(
+    env: ManagerBasedRLEnv,
+    minimal_height: float = 0.05, # 相对提升阈值
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    height_offset: float = 0.03,
+    grasp_radius: float = 0.1,
+    target_close_pos: float = 0.03,
+) -> torch.Tensor:
+    
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+
+    lid = env.scene[lid_cfg.name]
+    
+    # 1. 获取高度
+    current_height = lid.data.root_pos_w[:, 2]
+    # 自动获取初始高度
+    initial_height = lid.data.default_root_state[:, 2]
+    
+    # 2. 计算提升量
+    lift_height = torch.clamp(current_height - initial_height, min=0.0)
+    
+    # 3. 速度检查 (防撞飞)
+    # 如果速度 > 1.0 m/s，视为被撞飞，不给分
+    lid_vel = torch.norm(lid.data.root_lin_vel_w, dim=1)
+    is_stable = lid_vel < 1.0
+    
+    # 4. 计算奖励
+    # 基础分：高度越高分越高
+    shaping_reward = lift_height * 2.0
+    # 成功分：超过阈值
+    success_bonus = torch.where(lift_height > minimal_height, 1.0, 0.0)
+    
+    # 组合
+    # 必须 is_grasped AND is_stable
+    reward = torch.where(is_stable & is_grasped, shaping_reward + success_bonus, torch.zeros_like(lift_height))
+    
+    return torch.nan_to_num(reward, nan=0.0)
+
+
+
+def lid_lift_success_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand_body",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"],
+    # 关键参数
+    settled_height: float = 0.75,  # 盖子落在超声仪上的稳定高度 (需根据仿真实际情况微调)
+    target_lift_height: float = 0.05, # 目标提升高度 (5cm)
+    grasp_dist_threshold: float = 0.05, # 抓取判定距离
+    closed_pos: float = 0.03 # 夹爪闭合阈值
+) -> torch.Tensor:
+    """
+    提升奖励：只有在【夹稳】的前提下，将盖子【相对】提升达到目标高度才给高分。
+    """
+    # 1. 获取数据
+    lid = env.scene[lid_cfg.name]
+    robot = env.scene[robot_cfg.name]
+    
+    # 盖子实时高度
+    lid_z = lid.data.root_pos_w[:, 2]
+    lid_pos = lid.data.root_pos_w[:, :3]
+    
+    # 夹爪位置
+    body_ids, _ = robot.find_bodies([left_gripper_name])
+    gripper_pos = robot.data.body_pos_w[:, body_ids[0], :3]
+    
+    # 2. 【条件一：是否夹稳 (Is Grasped?)】
+    # 距离检查
+    distance = torch.norm(gripper_pos - lid_pos, dim=-1)
+    is_close = distance < grasp_dist_threshold
+    
+    # 闭合检查
+    joint_ids, _ = robot.find_joints(joint_names)
+    joint_pos_abs = torch.abs(robot.data.joint_pos[:, joint_ids])
+    # 假设 > 80% 的闭合度算抓紧了
+    is_closed = torch.all(joint_pos_abs > (closed_pos * 0.8), dim=-1)
+    
+    is_grasped = is_close & is_closed
+    
+    # 3. 【条件二：计算相对提升 (Relative Lift)】
+    # 当前高度 - 初始稳定高度
+    current_lift = lid_z - settled_height
+    
+    # 4. 计算奖励
+    lift_progress = torch.clamp(current_lift / target_lift_height, min=0.0, max=1.0)
+    
+    # 基础提升分 (Shaping Reward)
+    lift_reward = lift_progress
+    
+    # 成功大奖 (Success Bonus)
+    # 如果提升超过目标高度 (比如 > 95% 的 5cm)，给额外大奖
+    success_bonus = torch.where(current_lift >= target_lift_height, 2.0, 0.0)
+    
+    # 组合：只有在 is_grasped 为 True 时才发放提升奖励
+    total_reward = torch.where(is_grasped, lift_reward + success_bonus, torch.zeros_like(lift_reward))
+    
+    # 5. 安全输出
+    return torch.nan_to_num(total_reward, nan=0.0)
+
+
+def lid_lift_progress_reward(
+    env: ManagerBasedRLEnv,
+    lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+    left_gripper_name: str = "left_hand__l",
+    right_gripper_name: str = "left_hand_r",
+    joint_names: list = ["left_hand_joint_left", "left_hand_joint_right"], 
+    initial_height: float = 0.8, 
+    target_height_lift: float = 0.15,
+    height_offset: float = 0.02, # 必须与抓取奖励保持一致
+    grasp_radius: float = 0.1,   # 提升时可以稍微放宽一点半径，防止抖动丢分
+    target_close_pos: float = 0.03,
+    std: float = 0.05 # 标准差
+) -> torch.Tensor:
+
+    # 1. 判定是否夹住
+    is_grasped = _is_grasped(
+        env, lid_cfg, robot_cfg, left_gripper_name, right_gripper_name,
+        joint_names, height_offset, grasp_radius, target_close_pos
+    )
+    
+    # 2. 计算高度
+    lid = env.scene[lid_cfg.name]
+    current_height = lid.data.root_pos_w[:, 2]
+    lift_height = torch.clamp(current_height - initial_height, min=0.0, max=target_height_lift)
+    # print(current_height)
+    # print(lift_height)
+    # 3. 计算奖励
+    # 只有在 is_grasped 为 True 时，才发放高度奖励
+    # 这样彻底杜绝了 "砸飞/撞飞" 拿分的情况
+    progress = torch.tanh(lift_height / std)
+    
+    reward = torch.where(is_grasped, progress, 0.0)
+    
+    return reward
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import SceneEntityCfg
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+def lid_dropped(env: ManagerBasedRLEnv,
+                minimum_height: float = -0.05,
+                lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube")) -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    return lid.data.root_pos_w[:, 2] < minimum_height
+
+def lid_successfully_grasped(env: ManagerBasedRLEnv,
+                             distance_threshold: float = 0.03,
+                             height_threshold: float = 0.2,
+                             lid_cfg: SceneEntityCfg = SceneEntityCfg("DexCube"),
+                             robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                             gripper_body_name: str = "left_hand_body") -> torch.Tensor:
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_idx = robot.find_bodies(gripper_body_name)[0]
+    gripper_pos_w = robot.data.body_pos_w[:, body_idx, :]
+    distance = torch.norm(lid.data.root_pos_w[:, :3] - gripper_pos_w, dim=1)
+    is_close = distance < distance_threshold
+    is_lifted = lid.data.root_pos_w[:, 2] > height_threshold
+    return is_close & is_lifted
+
+def gripper_at_lid_side(env: ManagerBasedRLEnv,
+                        lid_cfg: SceneEntityCfg = SceneEntityCfg("lid"),
+                        robot_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot"),
+                        left_gripper_name: str = "left_hand__l",  # 改为两个下划线
+                        right_gripper_name: str = "left_hand_r",
+                        side_distance: float = 0.05,
+                        side_tolerance: float = 0.01,
+                        alignment_tolerance: float = 0.02,
+                        height_offset: float = 0.1,
+                        height_tolerance: float = 0.02) -> torch.Tensor:
+    """Terminate when gripper center is positioned on the side of the lid at specified height.
+    
+    坐标系说明：
+    - X 方向：两个夹爪朝中心合并的方向
+    - Y 方向：夹爪间空隙的方向，和 lid 的把手方向一致
+    - Z 方向：高度方向
+    """
+    lid: RigidObject = env.scene[lid_cfg.name]
+    robot: Articulation = env.scene[robot_cfg.name]
+    
+    lid_pos_w = lid.data.root_pos_w[:, :3]
+    
+    # 获取两个夹爪的位置
+    left_body_ids, _ = robot.find_bodies([left_gripper_name], preserve_order=True)
+    right_body_ids, _ = robot.find_bodies([right_gripper_name], preserve_order=True)
+    
+    left_gripper_pos = robot.data.body_pos_w[:, left_body_ids[0], :]
+    right_gripper_pos = robot.data.body_pos_w[:, right_body_ids[0], :]
+    
+    # 计算夹爪中心位置
+    gripper_center = (left_gripper_pos + right_gripper_pos) / 2.0
+    rel_pos = gripper_center - lid_pos_w
+    
+    # Y 方向：应该在 lid 的 Y 方向两侧（距离 side_distance）
+    y_dist = torch.abs(rel_pos[:, 1])
+    y_ok = (y_dist >= side_distance - side_tolerance) & (y_dist <= side_distance + side_tolerance)
+    
+    # X 方向：应该对齐（接近 0）
+    x_dist = torch.abs(rel_pos[:, 0])
+    x_ok = x_dist < alignment_tolerance
+    
+    # Z 方向：应该在 lid 上方 height_offset 处
+    z_error = torch.abs(rel_pos[:, 2] - height_offset)
+    z_ok = z_error < height_tolerance
+    
+    # 所有条件都要满足
+    return x_ok & y_ok & z_ok
+
+
+def base_height_failure(env: ManagerBasedRLEnv,
+                        minimum_height: float | None = None,
+                        maximum_height: float | None = None,
+                        asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Terminate when the robot's base height is outside the specified range."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    root_pos_z = asset.data.root_pos_w[:, 2]
+    
+    # check if height is outside the range
+    out_of_bounds = torch.zeros_like(root_pos_z, dtype=torch.bool)
+    if minimum_height is not None:
+        out_of_bounds |= root_pos_z < minimum_height
+    if maximum_height is not None:
+        out_of_bounds |= root_pos_z > maximum_height
+        
+    return out_of_bounds
+
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/pull_ultrasound_lid_up_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/pull_ultrasound_lid_up_env_cfg.py
@@ -0,0 +1,488 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+from re import T
+from tkinter import N
+import torch
+from isaaclab.actuators import ImplicitActuatorCfg
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+
+from . import mdp
+
+##
+# Pre-defined configs
+##
+
+from mindbot.robot.mindbot import MINDBOT_CFG
+
+# ====== 其他物体配置 ======
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+
+
+TABLE_CFG=RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Table",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.95, -0.3, 0.005],
+        rot=[0.7071, 0.0, 0.0, 0.7071],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/table/Table_C.usd",
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+LID_CFG = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/Lid",
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=[0.2, 0.65, 0.9],
+        rot=[1.0, 0.0, 0.0, 0.0],
+        lin_vel=[0.0, 0.0, 0.0],
+        ang_vel=[0.0, 0.0, 0.0],
+    ),
+    spawn=UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        # scale=(0.2, 0.2, 0.2),
+        rigid_props=RigidBodyPropertiesCfg(
+            rigid_body_enabled= True,
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            max_angular_velocity=1000.0,
+            max_linear_velocity=1000.0,
+            max_depenetration_velocity=0.5,#original 5.0
+            linear_damping=5.0, #original 0.5
+            angular_damping=5.0, #original 0.5
+            disable_gravity=False,
+        ),
+        collision_props=CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,#original 0.02
+            rest_offset=0
+        ),
+    ),
+)
+
+
+ULTRASOUND_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/ubuntu/50T/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False, 
+            max_depenetration_velocity=1.0
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,#
+            solver_position_iteration_count=32,
+            solver_velocity_iteration_count=16,
+            stabilization_threshold=1e-6,
+            # fix_root_link=True,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(pos=(0.25, 0.65, 0.1)),
+    # actuators={}
+    actuators={
+        "passive_damper": ImplicitActuatorCfg(
+            # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
+            joint_names_expr=[".*"],
+            
+            stiffness=0.0, 
+            damping=1000.0, 
+            effort_limit_sim=10000.0, 
+            velocity_limit_sim=100.0,
+        ),
+    }
+)
+
+##
+# Scene definition
+##
+
+
+@configclass
+class MindbotSceneCfg(InteractiveSceneCfg):
+    """Configuration for a cart-pole scene."""
+
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+
+    # robot
+    Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
+    table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+
+
+##
+# MDP settings
+##
+
+def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+    asset = env.scene[asset_cfg.name]
+    body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
+    pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
+    return pos_w
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # 使用任务空间控制器：策略输出末端执行器位置+姿态增量（6维）
+    left_arm_ee = DifferentialInverseKinematicsActionCfg(
+        asset_name="Mindbot",
+        joint_names=["l_joint[1-6]"],  # 左臂的6个关节
+        body_name="left_hand_body",     # 末端执行器body名称
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",        # 控制位置+姿态
+            use_relative_mode=True,     # 相对模式：动作是增量
+            ik_method="dls",            # Damped Least Squares方法
+        ),
+        scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
+    )
+    
+    # right_arm_fixed = mdp.JointPositionActionCfg(
+    #     asset_name="Mindbot",
+    #     joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
+        
+    #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
+    #     scale=0.0, 
+        
+    #     # 2. 偏移设为你的目标角度（弧度）：这告诉电机“永远停在这里”
+    #     # 对应 (135, -70, -90, 90, 90, -70)
+    #     offset={
+    #         "r_joint1": 2.3562,
+    #         "r_joint2": -1.2217,
+    #         "r_joint3": -1.5708,
+    #         "r_joint4": 1.5708,
+    #         "r_joint5": 1.5708,
+    #         "r_joint6": -1.2217,
+    #     },
+    # )
+
+    grippers_position = mdp.BinaryJointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+        
+        # 修正：使用字典格式
+        # open_command_expr={"关节名正则": 值}
+        open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
+        
+        # close_command_expr={"关节名正则": 值}
+        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+    )
+
+
+    trunk_position = mdp.JointPositionActionCfg(
+        asset_name="Mindbot",
+        joint_names=["PrismaticJoint"],
+        scale=0.00,
+        offset=0.3,
+        clip=None
+    )
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
+        
+        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
+                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
+        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
+        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
+        
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    # 重置所有关节到 init_state（无偏移）must be
+    reset_mindbot_all_joints = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    # 2. 底座安装误差 (建议缩小到 2cm)
+    reset_mindbot_base = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot"),
+            "pose_range": {
+                "x": (-0.05, 0.05),
+                "y": (-0.05, 0.05),
+                "z": (0.74, 0.75),
+                "yaw": (-0.1, 0.1),
+            },
+            "velocity_range": {"x": (0.0, 0.0), "y": (0.0, 0.0)},
+        },
+    )
+    # 1. 机械臂关节微小偏移 (0.01 弧度约 0.6 度，很合适)
+    reset_mindbot_left_arm = EventTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("Mindbot", joint_names=["l_joint[1-6]"]),
+            "position_range": (-0.01, 0.01),
+            "velocity_range": (0.0, 0.0),
+        },
+    )
+    
+    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
+        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_ultrasound = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("ultrasound"), 
+            "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+    reset_lid = EventTerm(
+        func=mdp.reset_root_state_uniform, 
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, 
+            "velocity_range": {"x": (0.0, 0.0)}
+            }
+        )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    gripper_lid_orientation_alignment = RewTerm(
+        func=mdp.gripper_lid_orientation_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "gripper_body_name": "left_hand_body",
+            "gripper_forward_axis": (0.0, 0.0, 1.0), 
+            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "lid_handle_axis": (0.0, 1.0, 0.0),
+            "max_angle_deg": 85.0,  # 允许60度内的偏差
+        },
+        weight=5
+    )
+
+    # stage 2
+    # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
+    gripper_lid_position_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07,  # Z方向：lid 上方 0.1m
+            "std": 0.3,  # 位置对齐的容忍度
+        },
+        weight=3.0
+    )
+
+    # 【新增】精细对齐 (引导进入 2cm 圈)
+    gripper_lid_fine_alignment = RewTerm(
+        func=mdp.gripper_lid_position_alignment,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+        },
+        weight=10.0 # 高权重
+    )
+
+    gripper_close_interaction = RewTerm( 
+        func=mdp.gripper_close_when_near,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "robot_cfg": SceneEntityCfg("Mindbot"),
+            "left_gripper_name": "left_hand__l",
+            "right_gripper_name": "left_hand_r",
+            "joint_names": ["left_hand_joint_left", "left_hand_joint_right"],
+            "target_close_pos": 0.03,
+            "height_offset": 0.04,
+            "grasp_radius": 0.03,
+        },
+        weight=10.0
+    )
+
+    lid_lifted_reward = RewTerm(
+        func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+        },
+        weight=10.0 # 给一个足够大的权重
+    )
+
+    lid_lifting_reward = RewTerm(
+        func=mdp.lid_lift_progress_reward,
+        params={
+            "lid_cfg": SceneEntityCfg("lid"),
+            "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+            "target_height_lift": 0.2,
+            "height_offset": 0.07, # 与其他奖励保持一致
+            "std": 0.1
+        },
+        # 权重设大一点，告诉它这是最终目标
+        weight=10.0 
+    )
+
+
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    mindbot_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
+    )
+    
+    lid_fly_away = DoneTerm(
+        func=mdp.base_height_failure,
+        params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
+    )
+
+    # 新增：盖子掉落判定
+    # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
+    lid_dropped = DoneTerm(
+        func=mdp.base_height_failure, # 复用高度判定函数
+        params={
+            "asset_cfg": SceneEntityCfg("lid"), 
+            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+        },
+    )
+    
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class CurriculumCfg:
+    pass
+    # action_rate = CurrTerm(func=mdp.modify_reward_weight,
+    #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
+                           
+    # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
+    #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
+
+    # position_std_scheduler = CurrTerm(
+    #     func=mdp.modify_term_cfg, # 直接使用 Isaac Lab 内置的类
+    #     params={
+    #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
+    #         "address": "rewards.gripper_lid_position_alignment.params.std",
+            
+    #         # 修改逻辑：使用我们刚才写的函数
+    #         "modify_fn": mdp.annealing_std,
+            
+    #         # 传给函数的参数
+    #         "modify_params": {
+    #             "start_step": 00, # 约 600 轮
+    #             "end_step":   5_000, # 约 1200 轮
+    #             "start_std": 0.3,
+    #             "end_std": 0.05,
+    #         }
+    #     }
+    # )
+
+@configclass
+class PullUltraSoundLidUpEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=5, env_spacing=3.0)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    events: EventCfg = EventCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # curriculum: CurriculumCfg = CurriculumCfg()
+
+    # Post initialization
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.episode_length_s = 10
+        # viewer settings
+        self.viewer.eye = (3.5, 0.0, 3.2)
+        # simulation settings
+        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.render_interval = self.decimation
+        # # 1. 基础堆内存
+        self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
+        
+        # # 5. 聚合对容量 (针对复杂的 Articulation)
+        self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 * 1024