diff --git a/.vscode/extensions.json b/.vscode/extensions.json
deleted file mode 100644
index 6306e43..0000000
--- a/.vscode/extensions.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-	// See http://go.microsoft.com/fwlink/?LinkId=827846
-	// for the documentation about the extensions.json format
-	"recommendations": [
-		"ms-python.python",
-		"ms-python.vscode-pylance",
-		"ban.spellright",
-		"ms-iot.vscode-ros",
-		"ms-python.black-formatter",
-		"ms-python.flake8",
-	]
-}
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
deleted file mode 100644
index 93b4218..0000000
--- a/.vscode/tasks.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "label": "setup_python_env",
-            "type": "shell",
-            "linux": {
-                "command": "${input:isaac_path}/python.sh ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
-            },
-            "windows": {
-                "command": "${input:isaac_path}/python.bat ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
-            }
-        }
-    ],
-    "inputs": [
-        {
-            "id": "isaac_path",
-            "description": "Absolute path to the current Isaac Sim installation. If you installed IsaacSim from pip, the import of it failed. Please make sure you run the task with the correct python environment. As fallback, you can directly execute the python script by running: ``python.sh <path-to-your-project>/.vscode/tools/setup_vscode.py``",
-            "default": "C:/isaacsim",
-            "type": "promptString"
-        },
-    ]
-}
\ No newline at end of file
diff --git a/.vscode/tools/launch.template.json b/.vscode/tools/launch.template.json
index a0b1a0c..a44f114 100644
--- a/.vscode/tools/launch.template.json
+++ b/.vscode/tools/launch.template.json
@@ -4,822 +4,54 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
-        // For standalone script execution
         {
             "name": "Python: Current File",
-            "type": "debugpy",
+            "type": "python",
             "request": "launch",
             "program": "${file}",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
+            "console": "integratedTerminal"
         },
         {
-            "name": "Python: Play Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
+            "name": "Python: Attach (windows-x86_64/linux-x86_64)",
+            "type": "python",
+            "request": "attach",
+            "port": 3000,
+            "host": "localhost"
         },
         {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
+            "name": "Python: Train Environment",
+            "type": "python",
             "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
+            "args" : ["--task", "Isaac-Reach-Franka-v0", "--headless"],
+            "program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/train.py",
+            "console": "integratedTerminal"
         },
         {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
+            "name": "Python: Play Environment",
+            "type": "python",
             "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
+            "args" : ["--task", "Isaac-Reach-Franka-v0", "--num_envs", "32"],
+            "program": "${workspaceFolder}/scripts/reinforcement_learning/rsl_rl/play.py",
+            "console": "integratedTerminal"
         },
         {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
+            "name": "Python: SinglePytest",
+            "type": "python",
             "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
+            "module": "pytest",
+            "args": [
+              "${file}"
+            ],
+            "console": "integratedTerminal"
         },
         {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
+            "name": "Python: ALL Pytest",
+            "type": "python",
             "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
+            "module": "pytest",
+            "args": ["source/isaaclab/test"],
             "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Train Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-        },
-        {
-            "name": "Python: Play Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-        },
-        // For script execution inside a Docker
-        {
-            "name": "Docker: Current File",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${file}",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            }
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rl_games/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with rl_games (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rl_games/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with rsl_rl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/rsl_rl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (AMP)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (IPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (MAPPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with skrl (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
-            "program": "${workspaceFolder}/scripts/skrl/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Train Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
-            "program": "${workspaceFolder}/scripts/sb3/train.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
-        {
-            "name": "Docker: Play Template-Mindbot-v0 with sb3 (PPO)",
-            "type": "debugpy",
-            "request": "launch",
-            "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
-            "program": "${workspaceFolder}/scripts/sb3/play.py",
-            "console": "integratedTerminal",
-            "env": {
-                "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
-            },
-        },
+            "justMyCode": false
+        }
     ]
-}
\ No newline at end of file
+}
diff --git a/.vscode/tools/settings.template.json b/.vscode/tools/settings.template.json
index 5b97ac2..4b07a6a 100644
--- a/.vscode/tools/settings.template.json
+++ b/.vscode/tools/settings.template.json
@@ -1,4 +1,9 @@
 {
+    "files.exclude": {
+        "**/.mypy_cache": true,
+        "**/__pycache__": true,
+        "**/*.egg-info": true
+    },
     "files.associations": {
         "*.tpp": "cpp",
         "*.kit": "toml",
@@ -40,7 +45,6 @@
         "teleoperation",
         "xform",
         "numpy",
-        "tensordict",
         "flatcache",
         "physx",
         "dpad",
@@ -51,26 +55,21 @@
         "arange",
         "discretization",
         "trimesh",
-        "uninstanceable"
+        "uninstanceable",
+        "coeff",
+        "prestartup"
     ],
     // This enables python language server. Seems to work slightly better than jedi:
     "python.languageServer": "Pylance",
-    // We use "black" as a formatter:
-    "python.formatting.provider": "black",
-    "python.formatting.blackArgs": ["--line-length", "120"],
-    // Use flake8 for linting
-    "python.linting.pylintEnabled": false,
-    "python.linting.flake8Enabled": true,
-    "python.linting.flake8Args": [
-        "--max-line-length=120"
-    ],
+    // Use ruff as a formatter and linter
+    "ruff.configuration": "${workspaceFolder}/pyproject.toml",
     // Use docstring generator
     "autoDocstring.docstringFormat": "google",
     "autoDocstring.guessTypes": true,
     // Python environment path
     // note: the default interpreter is overridden when user selects a workspace interpreter
     //     in the status bar. For example, the virtual environment python interpreter
-    "python.defaultInterpreterPath": "",
+    "python.defaultInterpreterPath": "${workspaceFolder}/_isaac_sim/python.sh",
     // ROS distribution
     "ros.distro": "noetic",
     // Language specific settings
@@ -81,6 +80,6 @@
         "editor.tabSize": 2
     },
     // Python extra paths
-    // Note: this is filled up when vscode is set up for the first time
+    // Note: this is filled up when "./isaaclab.sh -i" is run
     "python.analysis.extraPaths": []
 }
diff --git a/.vscode/tools/setup_vscode.py b/.vscode/tools/setup_vscode.py
index f8c61b7..9e2c637 100644
--- a/.vscode/tools/setup_vscode.py
+++ b/.vscode/tools/setup_vscode.py
@@ -12,34 +12,21 @@ This is necessary because Isaac Sim 2022.2.1 onwards does not add the necessary
 when the "setup_python_env.sh" is run as part of the vs-code launch configuration.
 """
 
-import argparse
-import os
-import pathlib
-import platform
 import re
 import sys
+import os
+import pathlib
 
-PROJECT_DIR = pathlib.Path(__file__).parents[2]
-"""Path to the the project directory."""
+
+ISAACLAB_DIR = pathlib.Path(__file__).parents[2]
+"""Path to the Isaac Lab directory."""
 
 try:
     import isaacsim  # noqa: F401
 
     isaacsim_dir = os.environ.get("ISAAC_PATH", "")
 except ModuleNotFoundError or ImportError:
-    # Create a parser to get the isaac-sim path
-    parser = argparse.ArgumentParser(description="Setup the VSCode settings for the project.")
-    parser.add_argument("--isaac_path", type=str, help="The absolute path to the Isaac Sim installation.")
-    args = parser.parse_args()
-
-    # parse the isaac-sim directory
-    isaacsim_dir = args.isaac_path
-    # check if the isaac-sim directory is provided
-    if not os.path.exists(isaacsim_dir):
-        raise FileNotFoundError(
-            f"Could not find the isaac-sim directory: {isaacsim_dir}. Please provide the correct path to the Isaac Sim"
-            " installation."
-        )
+    isaacsim_dir = os.path.join(ISAACLAB_DIR, "_isaac_sim")
 except EOFError:
     print("Unable to trigger EULA acceptance. This is likely due to the script being run in a non-interactive shell.")
     print("Please run the script in an interactive shell to accept the EULA.")
@@ -49,11 +36,11 @@ except EOFError:
 # check if the isaac-sim directory exists
 if not os.path.exists(isaacsim_dir):
     raise FileNotFoundError(
-        f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:\n\t1. The"
-        " Isaac Sim directory does not exist as provided CLI path.\n\t2. The script couldn't import the 'isaacsim'"
-        " package. This could be due to the 'isaacsim' package not being installed in the Python"
-        " environment.\n\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is"
-        " installed."
+        f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:"
+        f"\n\t1. The Isaac Sim directory does not exist as a symlink at: {os.path.join(ISAACLAB_DIR, '_isaac_sim')}"
+        "\n\t2. The script could not import the 'isaacsim' package. This could be due to the 'isaacsim' package not "
+        "being installed in the Python environment.\n"
+        "\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is installed."
     )
 
 ISAACSIM_DIR = isaacsim_dir
@@ -98,7 +85,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
         path_names = [path_name for path_name in path_names if len(path_name) > 0]
 
         # change the path names to be relative to the Isaac Lab directory
-        rel_path = os.path.relpath(ISAACSIM_DIR, PROJECT_DIR)
+        rel_path = os.path.relpath(ISAACSIM_DIR, ISAACLAB_DIR)
         path_names = ['"${workspaceFolder}/' + rel_path + "/" + path_name + '"' for path_name in path_names]
     else:
         path_names = []
@@ -111,7 +98,7 @@ def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
         )
 
     # add the path names that are in the Isaac Lab extensions directory
-    isaaclab_extensions = os.listdir(os.path.join(PROJECT_DIR, "source"))
+    isaaclab_extensions = os.listdir(os.path.join(ISAACLAB_DIR, "source"))
     path_names.extend(['"${workspaceFolder}/source/' + ext + '"' for ext in isaaclab_extensions])
 
     # combine them into a single string
@@ -144,17 +131,15 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
         The settings string with overwritten default python interpreter.
     """
     # read executable name
-    python_exe = os.path.normpath(sys.executable)
-
-    # replace with Isaac Sim's python.sh or python.bat scripts to make sure python with correct
-    # source paths is set as default
-    if f"kit{os.sep}python{os.sep}bin{os.sep}python" in python_exe:
-        # Check if the OS is Windows or Linux to use appropriate shell file
-        if platform.system() == "Windows":
-            python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.bat")
-        else:
-            python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.sh")
+    python_exe = sys.executable.replace("\\", "/")
 
+    # We make an exception for replacing the default interpreter if the
+    # path (/kit/python/bin/python3) indicates that we are using a local/container
+    # installation of IsaacSim. We will preserve the calling script as the default, python.sh.
+    # We want to use python.sh because it modifies LD_LIBRARY_PATH and PYTHONPATH
+    # (among other envars) that we need for all of our dependencies to be accessible.
+    if "kit/python/bin/python3" in python_exe:
+        return isaaclab_settings
     # replace the default python interpreter in the Isaac Lab settings file with the path to the
     # python interpreter in the Isaac Lab directory
     isaaclab_settings = re.sub(
@@ -169,7 +154,7 @@ def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
 
 def main():
     # Isaac Lab template settings
-    isaaclab_vscode_template_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "settings.template.json")
+    isaaclab_vscode_template_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "settings.template.json")
     # make sure the Isaac Lab template settings file exists
     if not os.path.exists(isaaclab_vscode_template_filename):
         raise FileNotFoundError(
@@ -195,13 +180,13 @@ def main():
     isaaclab_settings = header_message + isaaclab_settings
 
     # write the Isaac Lab settings file
-    isaaclab_vscode_filename = os.path.join(PROJECT_DIR, ".vscode", "settings.json")
+    isaaclab_vscode_filename = os.path.join(ISAACLAB_DIR, ".vscode", "settings.json")
     with open(isaaclab_vscode_filename, "w") as f:
         f.write(isaaclab_settings)
 
     # copy the launch.json file if it doesn't exist
-    isaaclab_vscode_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "launch.json")
-    isaaclab_vscode_template_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "launch.template.json")
+    isaaclab_vscode_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "launch.json")
+    isaaclab_vscode_template_launch_filename = os.path.join(ISAACLAB_DIR, ".vscode", "tools", "launch.template.json")
     if not os.path.exists(isaaclab_vscode_launch_filename):
         # read template launch settings
         with open(isaaclab_vscode_template_launch_filename) as f:
diff --git a/scripts/environments/export_IODescriptors.py b/scripts/environments/export_IODescriptors.py
new file mode 100644
index 0000000..3f515a1
--- /dev/null
+++ b/scripts/environments/export_IODescriptors.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to an environment with random action agent."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+import os
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Random agent for Isaac Lab environments.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--output_dir", type=str, default=None, help="Path to the output directory.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+args_cli.headless = True
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+import torch
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import parse_env_cfg
+
+# PLACEHOLDER: Extension template (do not remove this comment)
+
+
+def main():
+    """Random actions agent with Isaac Lab environment."""
+    # create environment configuration
+    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=1, use_fabric=True)
+    # create environment
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    # print info (this is vectorized environment)
+    print(f"[INFO]: Gym observation space: {env.observation_space}")
+    print(f"[INFO]: Gym action space: {env.action_space}")
+    # reset environment
+    env.reset()
+
+    outs = env.unwrapped.get_IO_descriptors
+    out_observations = outs["observations"]
+    out_actions = outs["actions"]
+    out_articulations = outs["articulations"]
+    out_scene = outs["scene"]
+    # Make a yaml file with the output
+    import yaml
+
+    name = args_cli.task.lower().replace("-", "_")
+    name = name.replace(" ", "_")
+
+    if not os.path.exists(args_cli.output_dir):
+        os.makedirs(args_cli.output_dir)
+
+    with open(os.path.join(args_cli.output_dir, f"{name}_IO_descriptors.yaml"), "w") as f:
+        print(f"[INFO]: Exporting IO descriptors to {os.path.join(args_cli.output_dir, f'{name}_IO_descriptors.yaml')}")
+        yaml.safe_dump(outs, f)
+
+    for k in out_actions:
+        print(f"--- Action term: {k['name']} ---")
+        k.pop("name")
+        for k1, v1 in k.items():
+            print(f"{k1}: {v1}")
+
+    for obs_group_name, obs_group in out_observations.items():
+        print(f"--- Obs group: {obs_group_name} ---")
+        for k in obs_group:
+            print(f"--- Obs term: {k['name']} ---")
+            k.pop("name")
+            for k1, v1 in k.items():
+                print(f"{k1}: {v1}")
+
+    for articulation_name, articulation_data in out_articulations.items():
+        print(f"--- Articulation: {articulation_name} ---")
+        for k1, v1 in articulation_data.items():
+            print(f"{k1}: {v1}")
+
+    for k1, v1 in out_scene.items():
+        print(f"{k1}: {v1}")
+
+    env.step(torch.zeros(env.action_space.shape, device=env.unwrapped.device))
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/list_envs.py b/scripts/environments/list_envs.py
new file mode 100644
index 0000000..0beb83e
--- /dev/null
+++ b/scripts/environments/list_envs.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Script to print all the available environments in Isaac Lab.
+
+The script iterates over all registered environments and stores the details in a table.
+It prints the name of the environment, the entry point and the config file.
+
+All the environments are registered in the `isaaclab_tasks` extension. They start
+with `Isaac` in their name.
+"""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="List Isaac Lab environments.")
+parser.add_argument("--keyword", type=str, default=None, help="Keyword to filter environments.")
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=True)
+simulation_app = app_launcher.app
+
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+from prettytable import PrettyTable
+
+import isaaclab_tasks  # noqa: F401
+
+
+def main():
+    """Print all environments registered in `isaaclab_tasks` extension."""
+    # print all the available environments
+    table = PrettyTable(["S. No.", "Task Name", "Entry Point", "Config"])
+    table.title = "Available Environments in Isaac Lab"
+    # set alignment of table columns
+    table.align["Task Name"] = "l"
+    table.align["Entry Point"] = "l"
+    table.align["Config"] = "l"
+
+    # count of environments
+    index = 0
+    # acquire all Isaac environments names
+    for task_spec in gym.registry.values():
+        if "Isaac" in task_spec.id and (args_cli.keyword is None or args_cli.keyword in task_spec.id):
+            # add details to table
+            table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec.kwargs["env_cfg_entry_point"]])
+            # increment count
+            index += 1
+
+    print(table)
+
+
+if __name__ == "__main__":
+    try:
+        # run the main function
+        main()
+    except Exception as e:
+        raise e
+    finally:
+        # close the app
+        simulation_app.close()
diff --git a/scripts/environments/random_agent.py b/scripts/environments/random_agent.py
new file mode 100644
index 0000000..6a40060
--- /dev/null
+++ b/scripts/environments/random_agent.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to an environment with random action agent."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Random agent for Isaac Lab environments.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+import torch
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import parse_env_cfg
+
+# PLACEHOLDER: Extension template (do not remove this comment)
+
+
+def main():
+    """Random actions agent with Isaac Lab environment."""
+    # create environment configuration
+    env_cfg = parse_env_cfg(
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
+    )
+    # create environment
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    # print info (this is vectorized environment)
+    print(f"[INFO]: Gym observation space: {env.observation_space}")
+    print(f"[INFO]: Gym action space: {env.action_space}")
+    # reset environment
+    env.reset()
+    # simulate environment
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # sample actions from -1 to 1
+            actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
+            # apply actions
+            env.step(actions)
+
+    # close the simulator
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/state_machine/lift_cube_sm.py b/scripts/environments/state_machine/lift_cube_sm.py
new file mode 100644
index 0000000..6136e2e
--- /dev/null
+++ b/scripts/environments/state_machine/lift_cube_sm.py
@@ -0,0 +1,319 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Script to run an environment with a pick and lift state machine.
+
+The state machine is implemented in the kernel function `infer_state_machine`.
+It uses the `warp` library to run the state machine in parallel on the GPU.
+
+.. code-block:: bash
+
+    ./isaaclab.sh -p scripts/environments/state_machine/lift_cube_sm.py --num_envs 32
+
+"""
+
+"""Launch Omniverse Toolkit first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Pick and lift state machine for lift environments.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+
+"""Rest everything else."""
+
+from collections.abc import Sequence
+
+import gymnasium as gym
+import torch
+import warp as wp
+
+from isaaclab.assets.rigid_object.rigid_object_data import RigidObjectData
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+from isaaclab_tasks.utils.parse_cfg import parse_env_cfg
+
+# initialize warp
+wp.init()
+
+
+class GripperState:
+    """States for the gripper."""
+
+    OPEN = wp.constant(1.0)
+    CLOSE = wp.constant(-1.0)
+
+
+class PickSmState:
+    """States for the pick state machine."""
+
+    REST = wp.constant(0)
+    APPROACH_ABOVE_OBJECT = wp.constant(1)
+    APPROACH_OBJECT = wp.constant(2)
+    GRASP_OBJECT = wp.constant(3)
+    LIFT_OBJECT = wp.constant(4)
+
+
+class PickSmWaitTime:
+    """Additional wait times (in s) for states for before switching."""
+
+    REST = wp.constant(0.2)
+    APPROACH_ABOVE_OBJECT = wp.constant(0.5)
+    APPROACH_OBJECT = wp.constant(0.6)
+    GRASP_OBJECT = wp.constant(0.3)
+    LIFT_OBJECT = wp.constant(1.0)
+
+
+@wp.func
+def distance_below_threshold(current_pos: wp.vec3, desired_pos: wp.vec3, threshold: float) -> bool:
+    return wp.length(current_pos - desired_pos) < threshold
+
+
+@wp.kernel
+def infer_state_machine(
+    dt: wp.array(dtype=float),
+    sm_state: wp.array(dtype=int),
+    sm_wait_time: wp.array(dtype=float),
+    ee_pose: wp.array(dtype=wp.transform),
+    object_pose: wp.array(dtype=wp.transform),
+    des_object_pose: wp.array(dtype=wp.transform),
+    des_ee_pose: wp.array(dtype=wp.transform),
+    gripper_state: wp.array(dtype=float),
+    offset: wp.array(dtype=wp.transform),
+    position_threshold: float,
+):
+    # retrieve thread id
+    tid = wp.tid()
+    # retrieve state machine state
+    state = sm_state[tid]
+    # decide next state
+    if state == PickSmState.REST:
+        des_ee_pose[tid] = ee_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.REST:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.APPROACH_ABOVE_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_ABOVE_OBJECT:
+        des_ee_pose[tid] = wp.transform_multiply(offset[tid], object_pose[tid])
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.APPROACH_OBJECT
+                sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.GRASP_OBJECT
+                sm_wait_time[tid] = 0.0
+    elif state == PickSmState.GRASP_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.GRASP_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.LIFT_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.LIFT_OBJECT:
+        des_ee_pose[tid] = des_object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= PickSmWaitTime.LIFT_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.LIFT_OBJECT
+                sm_wait_time[tid] = 0.0
+    # increment wait time
+    sm_wait_time[tid] = sm_wait_time[tid] + dt[tid]
+
+
+class PickAndLiftSm:
+    """A simple state machine in a robot's task space to pick and lift an object.
+
+    The state machine is implemented as a warp kernel. It takes in the current state of
+    the robot's end-effector and the object, and outputs the desired state of the robot's
+    end-effector and the gripper. The state machine is implemented as a finite state
+    machine with the following states:
+
+    1. REST: The robot is at rest.
+    2. APPROACH_ABOVE_OBJECT: The robot moves above the object.
+    3. APPROACH_OBJECT: The robot moves to the object.
+    4. GRASP_OBJECT: The robot grasps the object.
+    5. LIFT_OBJECT: The robot lifts the object to the desired pose. This is the final state.
+    """
+
+    def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu", position_threshold=0.01):
+        """Initialize the state machine.
+
+        Args:
+            dt: The environment time step.
+            num_envs: The number of environments to simulate.
+            device: The device to run the state machine on.
+        """
+        # save parameters
+        self.dt = float(dt)
+        self.num_envs = num_envs
+        self.device = device
+        self.position_threshold = position_threshold
+        # initialize state machine
+        self.sm_dt = torch.full((self.num_envs,), self.dt, device=self.device)
+        self.sm_state = torch.full((self.num_envs,), 0, dtype=torch.int32, device=self.device)
+        self.sm_wait_time = torch.zeros((self.num_envs,), device=self.device)
+
+        # desired state
+        self.des_ee_pose = torch.zeros((self.num_envs, 7), device=self.device)
+        self.des_gripper_state = torch.full((self.num_envs,), 0.0, device=self.device)
+
+        # approach above object offset
+        self.offset = torch.zeros((self.num_envs, 7), device=self.device)
+        self.offset[:, 2] = 0.1
+        self.offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # convert to warp
+        self.sm_dt_wp = wp.from_torch(self.sm_dt, wp.float32)
+        self.sm_state_wp = wp.from_torch(self.sm_state, wp.int32)
+        self.sm_wait_time_wp = wp.from_torch(self.sm_wait_time, wp.float32)
+        self.des_ee_pose_wp = wp.from_torch(self.des_ee_pose, wp.transform)
+        self.des_gripper_state_wp = wp.from_torch(self.des_gripper_state, wp.float32)
+        self.offset_wp = wp.from_torch(self.offset, wp.transform)
+
+    def reset_idx(self, env_ids: Sequence[int] = None):
+        """Reset the state machine."""
+        if env_ids is None:
+            env_ids = slice(None)
+        self.sm_state[env_ids] = 0
+        self.sm_wait_time[env_ids] = 0.0
+
+    def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_pose: torch.Tensor) -> torch.Tensor:
+        """Compute the desired state of the robot's end-effector and the gripper."""
+        # convert all transformations from (w, x, y, z) to (x, y, z, w)
+        ee_pose = ee_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        object_pose = object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        des_object_pose = des_object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+
+        # convert to warp
+        ee_pose_wp = wp.from_torch(ee_pose.contiguous(), wp.transform)
+        object_pose_wp = wp.from_torch(object_pose.contiguous(), wp.transform)
+        des_object_pose_wp = wp.from_torch(des_object_pose.contiguous(), wp.transform)
+
+        # run state machine
+        wp.launch(
+            kernel=infer_state_machine,
+            dim=self.num_envs,
+            inputs=[
+                self.sm_dt_wp,
+                self.sm_state_wp,
+                self.sm_wait_time_wp,
+                ee_pose_wp,
+                object_pose_wp,
+                des_object_pose_wp,
+                self.des_ee_pose_wp,
+                self.des_gripper_state_wp,
+                self.offset_wp,
+                self.position_threshold,
+            ],
+            device=self.device,
+        )
+
+        # convert transformations back to (w, x, y, z)
+        des_ee_pose = self.des_ee_pose[:, [0, 1, 2, 6, 3, 4, 5]]
+        # convert to torch
+        return torch.cat([des_ee_pose, self.des_gripper_state.unsqueeze(-1)], dim=-1)
+
+
+def main():
+    # parse configuration
+    env_cfg: LiftEnvCfg = parse_env_cfg(
+        "Isaac-Lift-Cube-Franka-IK-Abs-v0",
+        device=args_cli.device,
+        num_envs=args_cli.num_envs,
+        use_fabric=not args_cli.disable_fabric,
+    )
+    # create environment
+    env = gym.make("Isaac-Lift-Cube-Franka-IK-Abs-v0", cfg=env_cfg)
+    # reset environment at start
+    env.reset()
+
+    # create action buffers (position + quaternion)
+    actions = torch.zeros(env.unwrapped.action_space.shape, device=env.unwrapped.device)
+    actions[:, 3] = 1.0
+    # desired object orientation (we only do position control of object)
+    desired_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
+    desired_orientation[:, 1] = 1.0
+    # create state machine
+    pick_sm = PickAndLiftSm(
+        env_cfg.sim.dt * env_cfg.decimation, env.unwrapped.num_envs, env.unwrapped.device, position_threshold=0.01
+    )
+
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # step environment
+            dones = env.step(actions)[-2]
+
+            # observations
+            # -- end-effector frame
+            ee_frame_sensor = env.unwrapped.scene["ee_frame"]
+            tcp_rest_position = ee_frame_sensor.data.target_pos_w[..., 0, :].clone() - env.unwrapped.scene.env_origins
+            tcp_rest_orientation = ee_frame_sensor.data.target_quat_w[..., 0, :].clone()
+            # -- object frame
+            object_data: RigidObjectData = env.unwrapped.scene["object"].data
+            object_position = object_data.root_pos_w - env.unwrapped.scene.env_origins
+            # -- target object frame
+            desired_position = env.unwrapped.command_manager.get_command("object_pose")[..., :3]
+
+            # advance state machine
+            actions = pick_sm.compute(
+                torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
+                torch.cat([object_position, desired_orientation], dim=-1),
+                torch.cat([desired_position, desired_orientation], dim=-1),
+            )
+
+            # reset state machine
+            if dones.any():
+                pick_sm.reset_idx(dones.nonzero(as_tuple=False).squeeze(-1))
+
+    # close the environment
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/state_machine/lift_teddy_bear.py b/scripts/environments/state_machine/lift_teddy_bear.py
new file mode 100644
index 0000000..2eb4ae7
--- /dev/null
+++ b/scripts/environments/state_machine/lift_teddy_bear.py
@@ -0,0 +1,341 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Script to demonstrate lifting a deformable object with a robotic arm.
+
+The state machine is implemented in the kernel function `infer_state_machine`.
+It uses the `warp` library to run the state machine in parallel on the GPU.
+
+.. code-block:: bash
+
+    ./isaaclab.sh -p scripts/environments/state_machine/lift_teddy_bear.py
+
+"""
+
+"""Launch Omniverse Toolkit first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Pick and lift a teddy bear with a robotic arm.")
+parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+
+# disable metrics assembler due to scene graph instancing
+from isaacsim.core.utils.extensions import disable_extension
+
+disable_extension("omni.usd.metrics.assembler.ui")
+
+"""Rest everything else."""
+
+from collections.abc import Sequence
+
+import gymnasium as gym
+import torch
+import warp as wp
+
+from isaaclab.assets.rigid_object.rigid_object_data import RigidObjectData
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+from isaaclab_tasks.utils.parse_cfg import parse_env_cfg
+
+# initialize warp
+wp.init()
+
+
+class GripperState:
+    """States for the gripper."""
+
+    OPEN = wp.constant(1.0)
+    CLOSE = wp.constant(-1.0)
+
+
+class PickSmState:
+    """States for the pick state machine."""
+
+    REST = wp.constant(0)
+    APPROACH_ABOVE_OBJECT = wp.constant(1)
+    APPROACH_OBJECT = wp.constant(2)
+    GRASP_OBJECT = wp.constant(3)
+    LIFT_OBJECT = wp.constant(4)
+    OPEN_GRIPPER = wp.constant(5)
+
+
+class PickSmWaitTime:
+    """Additional wait times (in s) for states for before switching."""
+
+    REST = wp.constant(0.2)
+    APPROACH_ABOVE_OBJECT = wp.constant(0.5)
+    APPROACH_OBJECT = wp.constant(0.6)
+    GRASP_OBJECT = wp.constant(0.6)
+    LIFT_OBJECT = wp.constant(1.0)
+    OPEN_GRIPPER = wp.constant(0.0)
+
+
+@wp.func
+def distance_below_threshold(current_pos: wp.vec3, desired_pos: wp.vec3, threshold: float) -> bool:
+    return wp.length(current_pos - desired_pos) < threshold
+
+
+@wp.kernel
+def infer_state_machine(
+    dt: wp.array(dtype=float),
+    sm_state: wp.array(dtype=int),
+    sm_wait_time: wp.array(dtype=float),
+    ee_pose: wp.array(dtype=wp.transform),
+    object_pose: wp.array(dtype=wp.transform),
+    des_object_pose: wp.array(dtype=wp.transform),
+    des_ee_pose: wp.array(dtype=wp.transform),
+    gripper_state: wp.array(dtype=float),
+    offset: wp.array(dtype=wp.transform),
+    position_threshold: float,
+):
+    # retrieve thread id
+    tid = wp.tid()
+    # retrieve state machine state
+    state = sm_state[tid]
+    # decide next state
+    if state == PickSmState.REST:
+        des_ee_pose[tid] = ee_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.REST:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.APPROACH_ABOVE_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_ABOVE_OBJECT:
+        des_ee_pose[tid] = wp.transform_multiply(offset[tid], object_pose[tid])
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.APPROACH_OBJECT
+                sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.GRASP_OBJECT
+                sm_wait_time[tid] = 0.0
+    elif state == PickSmState.GRASP_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.GRASP_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.LIFT_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.LIFT_OBJECT:
+        des_ee_pose[tid] = des_object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= PickSmWaitTime.LIFT_OBJECT:
+                # move to next state and reset wait time
+                sm_state[tid] = PickSmState.OPEN_GRIPPER
+                sm_wait_time[tid] = 0.0
+    elif state == PickSmState.OPEN_GRIPPER:
+        # des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.OPEN_GRIPPER:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.OPEN_GRIPPER
+            sm_wait_time[tid] = 0.0
+    # increment wait time
+    sm_wait_time[tid] = sm_wait_time[tid] + dt[tid]
+
+
+class PickAndLiftSm:
+    """A simple state machine in a robot's task space to pick and lift an object.
+
+    The state machine is implemented as a warp kernel. It takes in the current state of
+    the robot's end-effector and the object, and outputs the desired state of the robot's
+    end-effector and the gripper. The state machine is implemented as a finite state
+    machine with the following states:
+
+    1. REST: The robot is at rest.
+    2. APPROACH_ABOVE_OBJECT: The robot moves above the object.
+    3. APPROACH_OBJECT: The robot moves to the object.
+    4. GRASP_OBJECT: The robot grasps the object.
+    5. LIFT_OBJECT: The robot lifts the object to the desired pose. This is the final state.
+    """
+
+    def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu", position_threshold=0.01):
+        """Initialize the state machine.
+
+        Args:
+            dt: The environment time step.
+            num_envs: The number of environments to simulate.
+            device: The device to run the state machine on.
+        """
+        # save parameters
+        self.dt = float(dt)
+        self.num_envs = num_envs
+        self.device = device
+        self.position_threshold = position_threshold
+        # initialize state machine
+        self.sm_dt = torch.full((self.num_envs,), self.dt, device=self.device)
+        self.sm_state = torch.full((self.num_envs,), 0, dtype=torch.int32, device=self.device)
+        self.sm_wait_time = torch.zeros((self.num_envs,), device=self.device)
+
+        # desired state
+        self.des_ee_pose = torch.zeros((self.num_envs, 7), device=self.device)
+        self.des_gripper_state = torch.full((self.num_envs,), 0.0, device=self.device)
+
+        # approach above object offset
+        self.offset = torch.zeros((self.num_envs, 7), device=self.device)
+        self.offset[:, 2] = 0.2
+        self.offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # convert to warp
+        self.sm_dt_wp = wp.from_torch(self.sm_dt, wp.float32)
+        self.sm_state_wp = wp.from_torch(self.sm_state, wp.int32)
+        self.sm_wait_time_wp = wp.from_torch(self.sm_wait_time, wp.float32)
+        self.des_ee_pose_wp = wp.from_torch(self.des_ee_pose, wp.transform)
+        self.des_gripper_state_wp = wp.from_torch(self.des_gripper_state, wp.float32)
+        self.offset_wp = wp.from_torch(self.offset, wp.transform)
+
+    def reset_idx(self, env_ids: Sequence[int] = None):
+        """Reset the state machine."""
+        if env_ids is None:
+            env_ids = slice(None)
+        self.sm_state[env_ids] = 0
+        self.sm_wait_time[env_ids] = 0.0
+
+    def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_pose: torch.Tensor):
+        """Compute the desired state of the robot's end-effector and the gripper."""
+        # convert all transformations from (w, x, y, z) to (x, y, z, w)
+        ee_pose = ee_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        object_pose = object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        des_object_pose = des_object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+
+        # convert to warp
+        ee_pose_wp = wp.from_torch(ee_pose.contiguous(), wp.transform)
+        object_pose_wp = wp.from_torch(object_pose.contiguous(), wp.transform)
+        des_object_pose_wp = wp.from_torch(des_object_pose.contiguous(), wp.transform)
+
+        # run state machine
+        wp.launch(
+            kernel=infer_state_machine,
+            dim=self.num_envs,
+            inputs=[
+                self.sm_dt_wp,
+                self.sm_state_wp,
+                self.sm_wait_time_wp,
+                ee_pose_wp,
+                object_pose_wp,
+                des_object_pose_wp,
+                self.des_ee_pose_wp,
+                self.des_gripper_state_wp,
+                self.offset_wp,
+                self.position_threshold,
+            ],
+            device=self.device,
+        )
+
+        # convert transformations back to (w, x, y, z)
+        des_ee_pose = self.des_ee_pose[:, [0, 1, 2, 6, 3, 4, 5]]
+        # convert to torch
+        return torch.cat([des_ee_pose, self.des_gripper_state.unsqueeze(-1)], dim=-1)
+
+
+def main():
+    # parse configuration
+    env_cfg: LiftEnvCfg = parse_env_cfg(
+        "Isaac-Lift-Teddy-Bear-Franka-IK-Abs-v0",
+        device=args_cli.device,
+        num_envs=args_cli.num_envs,
+    )
+
+    env_cfg.viewer.eye = (2.1, 1.0, 1.3)
+
+    # create environment
+    env = gym.make("Isaac-Lift-Teddy-Bear-Franka-IK-Abs-v0", cfg=env_cfg)
+    # reset environment at start
+    env.reset()
+
+    # create action buffers (position + quaternion)
+    actions = torch.zeros(env.unwrapped.action_space.shape, device=env.unwrapped.device)
+    actions[:, 3] = 1.0
+    # desired rotation after grasping
+    desired_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
+    desired_orientation[:, 1] = 1.0
+
+    object_grasp_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
+    # z-axis pointing down and 45 degrees rotation
+    object_grasp_orientation[:, 1] = 0.9238795
+    object_grasp_orientation[:, 2] = -0.3826834
+    object_local_grasp_position = torch.tensor([0.02, -0.08, 0.0], device=env.unwrapped.device)
+
+    # create state machine
+    pick_sm = PickAndLiftSm(env_cfg.sim.dt * env_cfg.decimation, env.unwrapped.num_envs, env.unwrapped.device)
+
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # step environment
+            dones = env.step(actions)[-2]
+
+            # observations
+            # -- end-effector frame
+            ee_frame_sensor = env.unwrapped.scene["ee_frame"]
+            tcp_rest_position = ee_frame_sensor.data.target_pos_w[..., 0, :].clone() - env.unwrapped.scene.env_origins
+            tcp_rest_orientation = ee_frame_sensor.data.target_quat_w[..., 0, :].clone()
+            # -- object frame
+            object_data: RigidObjectData = env.unwrapped.scene["object"].data
+            object_position = object_data.root_pos_w - env.unwrapped.scene.env_origins
+            object_position += object_local_grasp_position
+
+            # -- target object frame
+            desired_position = env.unwrapped.command_manager.get_command("object_pose")[..., :3]
+
+            # advance state machine
+            actions = pick_sm.compute(
+                torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
+                torch.cat([object_position, object_grasp_orientation], dim=-1),
+                torch.cat([desired_position, desired_orientation], dim=-1),
+            )
+
+            # reset state machine
+            if dones.any():
+                pick_sm.reset_idx(dones.nonzero(as_tuple=False).squeeze(-1))
+
+    # close the environment
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/state_machine/open_cabinet_sm.py b/scripts/environments/state_machine/open_cabinet_sm.py
new file mode 100644
index 0000000..3cb88d3
--- /dev/null
+++ b/scripts/environments/state_machine/open_cabinet_sm.py
@@ -0,0 +1,333 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Script to run an environment with a cabinet opening state machine.
+
+The state machine is implemented in the kernel function `infer_state_machine`.
+It uses the `warp` library to run the state machine in parallel on the GPU.
+
+.. code-block:: bash
+
+    ./isaaclab.sh -p scripts/environments/state_machine/open_cabinet_sm.py --num_envs 32
+
+"""
+
+"""Launch Omniverse Toolkit first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Pick and lift state machine for cabinet environments.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+
+"""Rest everything else."""
+
+from collections.abc import Sequence
+
+import gymnasium as gym
+import torch
+import warp as wp
+
+from isaaclab.sensors import FrameTransformer
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.manager_based.manipulation.cabinet.cabinet_env_cfg import CabinetEnvCfg
+from isaaclab_tasks.utils.parse_cfg import parse_env_cfg
+
+# initialize warp
+wp.init()
+
+
+class GripperState:
+    """States for the gripper."""
+
+    OPEN = wp.constant(1.0)
+    CLOSE = wp.constant(-1.0)
+
+
+class OpenDrawerSmState:
+    """States for the cabinet drawer opening state machine."""
+
+    REST = wp.constant(0)
+    APPROACH_INFRONT_HANDLE = wp.constant(1)
+    APPROACH_HANDLE = wp.constant(2)
+    GRASP_HANDLE = wp.constant(3)
+    OPEN_DRAWER = wp.constant(4)
+    RELEASE_HANDLE = wp.constant(5)
+
+
+class OpenDrawerSmWaitTime:
+    """Additional wait times (in s) for states for before switching."""
+
+    REST = wp.constant(0.5)
+    APPROACH_INFRONT_HANDLE = wp.constant(1.25)
+    APPROACH_HANDLE = wp.constant(1.0)
+    GRASP_HANDLE = wp.constant(1.0)
+    OPEN_DRAWER = wp.constant(3.0)
+    RELEASE_HANDLE = wp.constant(0.2)
+
+
+@wp.func
+def distance_below_threshold(current_pos: wp.vec3, desired_pos: wp.vec3, threshold: float) -> bool:
+    return wp.length(current_pos - desired_pos) < threshold
+
+
+@wp.kernel
+def infer_state_machine(
+    dt: wp.array(dtype=float),
+    sm_state: wp.array(dtype=int),
+    sm_wait_time: wp.array(dtype=float),
+    ee_pose: wp.array(dtype=wp.transform),
+    handle_pose: wp.array(dtype=wp.transform),
+    des_ee_pose: wp.array(dtype=wp.transform),
+    gripper_state: wp.array(dtype=float),
+    handle_approach_offset: wp.array(dtype=wp.transform),
+    handle_grasp_offset: wp.array(dtype=wp.transform),
+    drawer_opening_rate: wp.array(dtype=wp.transform),
+    position_threshold: float,
+):
+    # retrieve thread id
+    tid = wp.tid()
+    # retrieve state machine state
+    state = sm_state[tid]
+    # decide next state
+    if state == OpenDrawerSmState.REST:
+        des_ee_pose[tid] = ee_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # wait for a while
+        if sm_wait_time[tid] >= OpenDrawerSmWaitTime.REST:
+            # move to next state and reset wait time
+            sm_state[tid] = OpenDrawerSmState.APPROACH_INFRONT_HANDLE
+            sm_wait_time[tid] = 0.0
+    elif state == OpenDrawerSmState.APPROACH_INFRONT_HANDLE:
+        des_ee_pose[tid] = wp.transform_multiply(handle_approach_offset[tid], handle_pose[tid])
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= OpenDrawerSmWaitTime.APPROACH_INFRONT_HANDLE:
+                # move to next state and reset wait time
+                sm_state[tid] = OpenDrawerSmState.APPROACH_HANDLE
+                sm_wait_time[tid] = 0.0
+    elif state == OpenDrawerSmState.APPROACH_HANDLE:
+        des_ee_pose[tid] = handle_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        if distance_below_threshold(
+            wp.transform_get_translation(ee_pose[tid]),
+            wp.transform_get_translation(des_ee_pose[tid]),
+            position_threshold,
+        ):
+            # wait for a while
+            if sm_wait_time[tid] >= OpenDrawerSmWaitTime.APPROACH_HANDLE:
+                # move to next state and reset wait time
+                sm_state[tid] = OpenDrawerSmState.GRASP_HANDLE
+                sm_wait_time[tid] = 0.0
+    elif state == OpenDrawerSmState.GRASP_HANDLE:
+        des_ee_pose[tid] = wp.transform_multiply(handle_grasp_offset[tid], handle_pose[tid])
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= OpenDrawerSmWaitTime.GRASP_HANDLE:
+            # move to next state and reset wait time
+            sm_state[tid] = OpenDrawerSmState.OPEN_DRAWER
+            sm_wait_time[tid] = 0.0
+    elif state == OpenDrawerSmState.OPEN_DRAWER:
+        des_ee_pose[tid] = wp.transform_multiply(drawer_opening_rate[tid], handle_pose[tid])
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= OpenDrawerSmWaitTime.OPEN_DRAWER:
+            # move to next state and reset wait time
+            sm_state[tid] = OpenDrawerSmState.RELEASE_HANDLE
+            sm_wait_time[tid] = 0.0
+    elif state == OpenDrawerSmState.RELEASE_HANDLE:
+        des_ee_pose[tid] = ee_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= OpenDrawerSmWaitTime.RELEASE_HANDLE:
+            # move to next state and reset wait time
+            sm_state[tid] = OpenDrawerSmState.RELEASE_HANDLE
+            sm_wait_time[tid] = 0.0
+    # increment wait time
+    sm_wait_time[tid] = sm_wait_time[tid] + dt[tid]
+
+
+class OpenDrawerSm:
+    """A simple state machine in a robot's task space to open a drawer in the cabinet.
+
+    The state machine is implemented as a warp kernel. It takes in the current state of
+    the robot's end-effector and the object, and outputs the desired state of the robot's
+    end-effector and the gripper. The state machine is implemented as a finite state
+    machine with the following states:
+
+    1. REST: The robot is at rest.
+    2. APPROACH_HANDLE: The robot moves towards the handle of the drawer.
+    3. GRASP_HANDLE: The robot grasps the handle of the drawer.
+    4. OPEN_DRAWER: The robot opens the drawer.
+    5. RELEASE_HANDLE: The robot releases the handle of the drawer. This is the final state.
+    """
+
+    def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu", position_threshold=0.01):
+        """Initialize the state machine.
+
+        Args:
+            dt: The environment time step.
+            num_envs: The number of environments to simulate.
+            device: The device to run the state machine on.
+        """
+        # save parameters
+        self.dt = float(dt)
+        self.num_envs = num_envs
+        self.device = device
+        self.position_threshold = position_threshold
+        # initialize state machine
+        self.sm_dt = torch.full((self.num_envs,), self.dt, device=self.device)
+        self.sm_state = torch.full((self.num_envs,), 0, dtype=torch.int32, device=self.device)
+        self.sm_wait_time = torch.zeros((self.num_envs,), device=self.device)
+
+        # desired state
+        self.des_ee_pose = torch.zeros((self.num_envs, 7), device=self.device)
+        self.des_gripper_state = torch.full((self.num_envs,), 0.0, device=self.device)
+
+        # approach in front of the handle
+        self.handle_approach_offset = torch.zeros((self.num_envs, 7), device=self.device)
+        self.handle_approach_offset[:, 0] = -0.1
+        self.handle_approach_offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # handle grasp offset
+        self.handle_grasp_offset = torch.zeros((self.num_envs, 7), device=self.device)
+        self.handle_grasp_offset[:, 0] = 0.025
+        self.handle_grasp_offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # drawer opening rate
+        self.drawer_opening_rate = torch.zeros((self.num_envs, 7), device=self.device)
+        self.drawer_opening_rate[:, 0] = -0.015
+        self.drawer_opening_rate[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # convert to warp
+        self.sm_dt_wp = wp.from_torch(self.sm_dt, wp.float32)
+        self.sm_state_wp = wp.from_torch(self.sm_state, wp.int32)
+        self.sm_wait_time_wp = wp.from_torch(self.sm_wait_time, wp.float32)
+        self.des_ee_pose_wp = wp.from_torch(self.des_ee_pose, wp.transform)
+        self.des_gripper_state_wp = wp.from_torch(self.des_gripper_state, wp.float32)
+        self.handle_approach_offset_wp = wp.from_torch(self.handle_approach_offset, wp.transform)
+        self.handle_grasp_offset_wp = wp.from_torch(self.handle_grasp_offset, wp.transform)
+        self.drawer_opening_rate_wp = wp.from_torch(self.drawer_opening_rate, wp.transform)
+
+    def reset_idx(self, env_ids: Sequence[int] | None = None):
+        """Reset the state machine."""
+        if env_ids is None:
+            env_ids = slice(None)
+        # reset state machine
+        self.sm_state[env_ids] = 0
+        self.sm_wait_time[env_ids] = 0.0
+
+    def compute(self, ee_pose: torch.Tensor, handle_pose: torch.Tensor):
+        """Compute the desired state of the robot's end-effector and the gripper."""
+        # convert all transformations from (w, x, y, z) to (x, y, z, w)
+        ee_pose = ee_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        handle_pose = handle_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        # convert to warp
+        ee_pose_wp = wp.from_torch(ee_pose.contiguous(), wp.transform)
+        handle_pose_wp = wp.from_torch(handle_pose.contiguous(), wp.transform)
+
+        # run state machine
+        wp.launch(
+            kernel=infer_state_machine,
+            dim=self.num_envs,
+            inputs=[
+                self.sm_dt_wp,
+                self.sm_state_wp,
+                self.sm_wait_time_wp,
+                ee_pose_wp,
+                handle_pose_wp,
+                self.des_ee_pose_wp,
+                self.des_gripper_state_wp,
+                self.handle_approach_offset_wp,
+                self.handle_grasp_offset_wp,
+                self.drawer_opening_rate_wp,
+                self.position_threshold,
+            ],
+            device=self.device,
+        )
+
+        # convert transformations back to (w, x, y, z)
+        des_ee_pose = self.des_ee_pose[:, [0, 1, 2, 6, 3, 4, 5]]
+        # convert to torch
+        return torch.cat([des_ee_pose, self.des_gripper_state.unsqueeze(-1)], dim=-1)
+
+
+def main():
+    # parse configuration
+    env_cfg: CabinetEnvCfg = parse_env_cfg(
+        "Isaac-Open-Drawer-Franka-IK-Abs-v0",
+        device=args_cli.device,
+        num_envs=args_cli.num_envs,
+        use_fabric=not args_cli.disable_fabric,
+    )
+    # create environment
+    env = gym.make("Isaac-Open-Drawer-Franka-IK-Abs-v0", cfg=env_cfg)
+    # reset environment at start
+    env.reset()
+
+    # create action buffers (position + quaternion)
+    actions = torch.zeros(env.unwrapped.action_space.shape, device=env.unwrapped.device)
+    actions[:, 3] = 1.0
+    # desired object orientation (we only do position control of object)
+    desired_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
+    desired_orientation[:, 1] = 1.0
+    # create state machine
+    open_sm = OpenDrawerSm(env_cfg.sim.dt * env_cfg.decimation, env.unwrapped.num_envs, env.unwrapped.device)
+
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # step environment
+            dones = env.step(actions)[-2]
+
+            # observations
+            # -- end-effector frame
+            ee_frame_tf: FrameTransformer = env.unwrapped.scene["ee_frame"]
+            tcp_rest_position = ee_frame_tf.data.target_pos_w[..., 0, :].clone() - env.unwrapped.scene.env_origins
+            tcp_rest_orientation = ee_frame_tf.data.target_quat_w[..., 0, :].clone()
+            # -- handle frame
+            cabinet_frame_tf: FrameTransformer = env.unwrapped.scene["cabinet_frame"]
+            cabinet_position = cabinet_frame_tf.data.target_pos_w[..., 0, :].clone() - env.unwrapped.scene.env_origins
+            cabinet_orientation = cabinet_frame_tf.data.target_quat_w[..., 0, :].clone()
+
+            # advance state machine
+            actions = open_sm.compute(
+                torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
+                torch.cat([cabinet_position, cabinet_orientation], dim=-1),
+            )
+
+            # reset state machine
+            if dones.any():
+                open_sm.reset_idx(dones.nonzero(as_tuple=False).squeeze(-1))
+
+    # close the environment
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main execution
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/teleoperation/teleop_se3_agent.py b/scripts/environments/teleoperation/teleop_se3_agent.py
new file mode 100644
index 0000000..26776ec
--- /dev/null
+++ b/scripts/environments/teleoperation/teleop_se3_agent.py
@@ -0,0 +1,314 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to run teleoperation with Isaac Lab manipulation environments.
+
+Supports multiple input devices (e.g., keyboard, spacemouse, gamepad) and devices
+configured within the environment (including OpenXR-based hand tracking or motion
+controllers)."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+from collections.abc import Callable
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(
+    description="Teleoperation for Isaac Lab environments."
+)
+parser.add_argument(
+    "--num_envs", type=int, default=1, help="Number of environments to simulate."
+)
+parser.add_argument(
+    "--teleop_device",
+    type=str,
+    default="keyboard",
+    help=(
+        "Teleop device. Set here (legacy) or via the environment config. If using the environment config, pass the"
+        " device key/name defined under 'teleop_devices' (it can be a custom name, not necessarily 'handtracking')."
+        " Built-ins: keyboard, spacemouse, gamepad. Not all tasks support all built-ins."
+    ),
+)
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--sensitivity", type=float, default=1.0, help="Sensitivity factor."
+)
+parser.add_argument(
+    "--enable_pinocchio",
+    action="store_true",
+    default=False,
+    help="Enable Pinocchio.",
+)
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+app_launcher_args = vars(args_cli)
+
+if args_cli.enable_pinocchio:
+    # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and
+    # not the one installed by Isaac Sim pinocchio is required by the Pink IK controllers and the
+    # GR1T2 retargeter
+    import pinocchio  # noqa: F401
+if "handtracking" in args_cli.teleop_device.lower():
+    app_launcher_args["xr"] = True
+
+# launch omniverse app
+app_launcher = AppLauncher(app_launcher_args)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+
+import logging
+
+import gymnasium as gym
+import torch
+
+from isaaclab.devices import (
+    Se3Gamepad,
+    Se3GamepadCfg,
+    Se3Keyboard,
+    Se3KeyboardCfg,
+    Se3SpaceMouse,
+    Se3SpaceMouseCfg,
+)
+from isaaclab.devices.openxr import remove_camera_configs
+from isaaclab.devices.teleop_device_factory import create_teleop_device
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+
+import isaaclab_tasks  # noqa: F401
+import mindbot.tasks  # noqa: F401
+from isaaclab_tasks.manager_based.manipulation.lift import mdp
+from isaaclab_tasks.utils import parse_env_cfg
+
+if args_cli.enable_pinocchio:
+    import isaaclab_tasks.manager_based.locomanipulation.pick_place  # noqa: F401
+    import isaaclab_tasks.manager_based.manipulation.pick_place  # noqa: F401
+
+# import logger
+logger = logging.getLogger(__name__)
+
+
+def main() -> None:
+    """
+    Run teleoperation with an Isaac Lab manipulation environment.
+
+    Creates the environment, sets up teleoperation interfaces and callbacks,
+    and runs the main simulation loop until the application is closed.
+
+    Returns:
+        None
+    """
+    # parse configuration
+    env_cfg = parse_env_cfg(
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs
+    )
+    env_cfg.env_name = args_cli.task
+    if not isinstance(env_cfg, ManagerBasedRLEnvCfg):
+        raise ValueError(
+            "Teleoperation is only supported for ManagerBasedRLEnv environments. "
+            f"Received environment config type: {type(env_cfg).__name__}"
+        )
+    # modify configuration
+    env_cfg.terminations.time_out = None
+    if "Lift" in args_cli.task:
+        # set the resampling time range to large number to avoid resampling
+        env_cfg.commands.object_pose.resampling_time_range = (1.0e9, 1.0e9)
+        # add termination condition for reaching the goal otherwise the environment won't reset
+        env_cfg.terminations.object_reached_goal = DoneTerm(
+            func=mdp.object_reached_goal
+        )
+
+    if args_cli.xr:
+        env_cfg = remove_camera_configs(env_cfg)
+        env_cfg.sim.render.antialiasing_mode = "DLSS"
+
+    try:
+        # create environment
+        env = gym.make(args_cli.task, cfg=env_cfg).unwrapped
+        # check environment name (for reach , we don't allow the gripper)
+        if "Reach" in args_cli.task:
+            logger.warning(
+                f"The environment '{args_cli.task}' does not support gripper control. The device command will be"
+                " ignored."
+            )
+    except Exception as e:
+        logger.error(f"Failed to create environment: {e}")
+        simulation_app.close()
+        return
+
+    # Flags for controlling teleoperation flow
+    should_reset_recording_instance = False
+    teleoperation_active = True
+
+    # Callback handlers
+    def reset_recording_instance() -> None:
+        """
+        Reset the environment to its initial state.
+
+        Sets a flag to reset the environment on the next simulation step.
+
+        Returns:
+            None
+        """
+        nonlocal should_reset_recording_instance
+        should_reset_recording_instance = True
+        print("Reset triggered - Environment will reset on next step")
+
+    def start_teleoperation() -> None:
+        """
+        Activate teleoperation control of the robot.
+
+        Enables the application of teleoperation commands to the environment.
+
+        Returns:
+            None
+        """
+        nonlocal teleoperation_active
+        teleoperation_active = True
+        print("Teleoperation activated")
+
+    def stop_teleoperation() -> None:
+        """
+        Deactivate teleoperation control of the robot.
+
+        Disables the application of teleoperation commands to the environment.
+
+        Returns:
+            None
+        """
+        nonlocal teleoperation_active
+        teleoperation_active = False
+        print("Teleoperation deactivated")
+
+    # Create device config if not already in env_cfg
+    teleoperation_callbacks: dict[str, Callable[[], None]] = {
+        "R": reset_recording_instance,
+        "START": start_teleoperation,
+        "STOP": stop_teleoperation,
+        "RESET": reset_recording_instance,
+    }
+
+    # For hand tracking devices, add additional callbacks
+    if args_cli.xr:
+        # Default to inactive for hand tracking
+        teleoperation_active = False
+    else:
+        # Always active for other devices
+        teleoperation_active = True
+
+    # Create teleop device from config if present, otherwise create manually
+    teleop_interface = None
+    try:
+        if (
+            hasattr(env_cfg, "teleop_devices")
+            and args_cli.teleop_device in env_cfg.teleop_devices.devices
+        ):
+            teleop_interface = create_teleop_device(
+                args_cli.teleop_device,
+                env_cfg.teleop_devices.devices,
+                teleoperation_callbacks,
+            )
+        else:
+            logger.warning(
+                f"No teleop device '{args_cli.teleop_device}' found in environment config. Creating default."
+            )
+            # Create fallback teleop device
+            sensitivity = args_cli.sensitivity
+            if args_cli.teleop_device.lower() == "keyboard":
+                teleop_interface = Se3Keyboard(
+                    Se3KeyboardCfg(
+                        pos_sensitivity=0.05 * sensitivity,
+                        rot_sensitivity=0.05 * sensitivity,
+                    )
+                )
+            elif args_cli.teleop_device.lower() == "spacemouse":
+                teleop_interface = Se3SpaceMouse(
+                    Se3SpaceMouseCfg(
+                        pos_sensitivity=0.05 * sensitivity,
+                        rot_sensitivity=0.05 * sensitivity,
+                    )
+                )
+            elif args_cli.teleop_device.lower() == "gamepad":
+                teleop_interface = Se3Gamepad(
+                    Se3GamepadCfg(
+                        pos_sensitivity=0.1 * sensitivity,
+                        rot_sensitivity=0.1 * sensitivity,
+                    )
+                )
+            else:
+                logger.error(f"Unsupported teleop device: {args_cli.teleop_device}")
+                logger.error("Configure the teleop device in the environment config.")
+                env.close()
+                simulation_app.close()
+                return
+
+            # Add callbacks to fallback device
+            for key, callback in teleoperation_callbacks.items():
+                try:
+                    teleop_interface.add_callback(key, callback)
+                except (ValueError, TypeError) as e:
+                    logger.warning(f"Failed to add callback for key {key}: {e}")
+    except Exception as e:
+        logger.error(f"Failed to create teleop device: {e}")
+        env.close()
+        simulation_app.close()
+        return
+
+    if teleop_interface is None:
+        logger.error("Failed to create teleop interface")
+        env.close()
+        simulation_app.close()
+        return
+
+    print(f"Using teleop device: {teleop_interface}")
+
+    # reset environment
+    env.reset()
+    teleop_interface.reset()
+
+    print("Teleoperation started. Press 'R' to reset the environment.")
+
+    # simulate environment
+    while simulation_app.is_running():
+        try:
+            # run everything in inference mode
+            with torch.inference_mode():
+                # get device command
+                action = teleop_interface.advance()
+
+                # Only apply teleop commands when active
+                if teleoperation_active:
+                    # process actions
+                    actions = action.repeat(env.num_envs, 1)
+                    # apply actions
+                    env.step(actions)
+                else:
+                    env.sim.render()
+
+                if should_reset_recording_instance:
+                    env.reset()
+                    teleop_interface.reset()
+                    should_reset_recording_instance = False
+                    print("Environment reset complete")
+        except Exception as e:
+            logger.error(f"Error during simulation step: {e}")
+            break
+
+    # close the simulator
+    env.close()
+    print("Environment closed")
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/scripts/environments/zero_agent.py b/scripts/environments/zero_agent.py
new file mode 100644
index 0000000..edd9317
--- /dev/null
+++ b/scripts/environments/zero_agent.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to run an environment with zero action agent."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Zero agent for Isaac Lab environments.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import gymnasium as gym
+import torch
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import parse_env_cfg
+
+# PLACEHOLDER: Extension template (do not remove this comment)
+
+
+def main():
+    """Zero actions agent with Isaac Lab environment."""
+    # parse configuration
+    env_cfg = parse_env_cfg(
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
+    )
+    # create environment
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    # print info (this is vectorized environment)
+    print(f"[INFO]: Gym observation space: {env.observation_space}")
+    print(f"[INFO]: Gym action space: {env.action_space}")
+    # reset environment
+    env.reset()
+    # simulate environment
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # compute zero actions
+            actions = torch.zeros(env.action_space.shape, device=env.unwrapped.device)
+            # apply actions
+            env.step(actions)
+
+    # close the simulator
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
diff --git a/source/mindbot/mindbot/robot/mindbot.py b/source/mindbot/mindbot/robot/mindbot.py
index 821630a..fa2685c 100644
--- a/source/mindbot/mindbot/robot/mindbot.py
+++ b/source/mindbot/mindbot/robot/mindbot.py
@@ -19,8 +19,8 @@ MINDBOT_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         # 1. UPDATE THE USD PATH to your local file
         # C:\Users\PC\workpalce\maic_usd_assets\twinlab\MIC_sim-3.0\pupet_arm_sim_product\sim_data_acquisition\usds\Collected_robot_2_3\robot_2_4.usd
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
-        # usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
+        # usd_path="/home/maic/LYT/maic_usd_assets/twinlab/MIC_sim-3.0/pupet_arm_sim_product/sim_data_acquisition/usds/Collected_robot_2_3/robot_2_6.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
             disable_gravity=False,
             max_depenetration_velocity=1.0,
@@ -36,8 +36,8 @@ MINDBOT_CFG = ArticulationCfg(
         ),
         collision_props=sim_utils.CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005, # 尝试从默认值增大到 0.01 或 0.02
-            rest_offset=0,   # 保持略小于 contact_offset
+            contact_offset=0.0005,  # 尝试从默认值增大到 0.01 或 0.02
+            rest_offset=0,  # 保持略小于 contact_offset
         ),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
@@ -45,7 +45,7 @@ MINDBOT_CFG = ArticulationCfg(
         # All are set to 0.0 here, but you should adjust them for a suitable "home" position.
         joint_pos={
             # Left arm joints
-# Left arm joints
+            # Left arm joints
             # 135° -> 2.3562
             "l_joint1": 2.3562,
             # -70° -> -1.2217
@@ -65,7 +65,7 @@ MINDBOT_CFG = ArticulationCfg(
             # "l_joint4": 0.0,
             # "l_joint5": 0.0,
             # "l_joint6": 0.0,
-                 # Right arm joints
+            # Right arm joints
             # # -45° -> -0.7854
             # "r_joint1": -0.7854,
             # # 70° -> 1.2217
@@ -95,15 +95,15 @@ MINDBOT_CFG = ArticulationCfg(
             # left gripper
             # 注意：如果希望初始状态为完全打开，应该设置为 left=0.0, right=0.0
             # 当前设置为 right=0.01 表示略微闭合状态
-            "left_hand_joint_left": 0.0,    # 范围：(-0.03, 0.0)，0.0=打开，-0.03=闭合
+            "left_hand_joint_left": 0.0,  # 范围：(-0.03, 0.0)，0.0=打开，-0.03=闭合
             "left_hand_joint_right": 0.0,  # 范围：(0.0, 0.03)，0.0=打开，0.03=闭合
             # right gripper
             "right_hand_joint_left": 0.0,
             "right_hand_joint_right": 0.0,
             # trunk
-            "PrismaticJoint": 0.23, # 0.30
+            "PrismaticJoint": 0.23,  # 0.30
             # head
-            "head_revoluteJoint": 0.0 #0.5236
+            "head_revoluteJoint": 0.0,  # 0.5236
         },
         # The initial (x, y, z) position of the robot's base in the world
         pos=(0, 0, 0.05),
@@ -112,40 +112,46 @@ MINDBOT_CFG = ArticulationCfg(
         # 3. DEFINE ACTUATOR GROUPS FOR THE ARMS
         # Group for the 6 left arm joints using a regular expression
         "left_arm": ImplicitActuatorCfg(
-            joint_names_expr=["l_joint[1-6]"], # This matches l_joint1, l_joint2, ..., l_joint6
-            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
-            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
-            stiffness=0.0, #10000.0,                  # Note: Tune this for desired control performance
-            damping=1000.0, #10000.0,                     # Note: Tune this for desired control performance
+            joint_names_expr=[
+                "l_joint[1-6]"
+            ],  # This matches l_joint1, l_joint2, ..., l_joint6
+            effort_limit_sim=100.0,  # Note: Tune this based on your robot's specs
+            velocity_limit_sim=100.0,  # Note: Tune this based on your robot's specs
+            stiffness=0.0,  # 10000.0,                  # Note: Tune this for desired control performance
+            damping=1000.0,  # 10000.0,                     # Note: Tune this for desired control performance
         ),
         # Group for the 6 right arm joints using a regular expression
         "right_arm": ImplicitActuatorCfg(
-            joint_names_expr=["r_joint[1-6]"], # This matches r_joint1, r_joint2, ..., r_joint6
-            effort_limit_sim=100.0,            # Note: Tune this based on your robot's specs
-            velocity_limit_sim=100.0,          # Note: Tune this based on your robot's specs
-            stiffness=10000.0, #10000.0,                  # Note: Tune this for desired control performance1
-            damping=1000.0, #10000.0,                   # Note: Tune this for desired control performance
+            joint_names_expr=[
+                "r_joint[1-6]"
+            ],  # This matches r_joint1, r_joint2, ..., r_joint6
+            effort_limit_sim=100.0,  # Note: Tune this based on your robot's specs
+            velocity_limit_sim=100.0,  # Note: Tune this based on your robot's specs
+            stiffness=10000.0,  # 10000.0,                  # Note: Tune this for desired control performance1
+            damping=1000.0,  # 10000.0,                   # Note: Tune this for desired control performance
+        ),
+        "head": ImplicitActuatorCfg(
+            joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0
         ),
-        "head": ImplicitActuatorCfg(joint_names_expr=["head_revoluteJoint"], stiffness=10000.0, damping=1000.0),
         "trunk": ImplicitActuatorCfg(
             joint_names_expr=["PrismaticJoint"],
-            stiffness=40000.0,    # 从 10000 增：强持位
-            damping=4000.0,      # 从 1000 增：抗振荡
+            stiffness=40000.0,  # 从 10000 增：强持位
+            damping=4000.0,  # 从 1000 增：抗振荡
             effort_limit_sim=10000.0,
             velocity_limit_sim=0.2,
         ),
         "wheels": ImplicitActuatorCfg(
-            joint_names_expr=[".*_revolute_Joint"], 
-            stiffness=0.0, 
+            joint_names_expr=[".*_revolute_Joint"],
+            stiffness=0.0,
             damping=100.0,
             effort_limit_sim=200.0,  # <<<<<< 新增：力矩上限，足够驱动轮子
-            velocity_limit_sim=50.0, # <<<<<< 新增：速度上限，防止过快
+            velocity_limit_sim=50.0,  # <<<<<< 新增：速度上限，防止过快
         ),
         "grippers": ImplicitActuatorCfg(
-            joint_names_expr=[".*_hand_joint.*"], 
-            stiffness=10000.0, #10000.0
+            joint_names_expr=[".*_hand_joint.*"],
+            stiffness=10000.0,  # 10000.0
             damping=1000.0,
-            effort_limit_sim=50.0, # default is 10, gemini said it is too small, so I set it to 50.
+            effort_limit_sim=50.0,  # default is 10, gemini said it is too small, so I set it to 50.
             velocity_limit_sim=50.0,
         ),
     },
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/__init__.py b/source/mindbot/mindbot/tasks/manager_based/il/__init__.py
new file mode 100644
index 0000000..4e712fa
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Imitation learning tasks."""
+
+import gymnasium as gym  # noqa: F401
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/__init__.py b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/__init__.py
new file mode 100644
index 0000000..804a0a4
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/__init__.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gymnasium as gym
+
+# from . import agents
+from .agents import *
+
+##
+# Register Gym environments.
+##
+
+##
+# MindRobot Left Arm IK-Rel (keyboard teleop)
+##
+
+# gym.register(
+#     id="Isaac-MindRobot-LeftArm-IK-Rel-v0",
+#     entry_point="isaaclab.envs:ManagerBasedRLEnv",
+#     kwargs={
+#         "env_cfg_entry_point": (
+#             "isaaclab_tasks.manager_based.manipulation.mindrobot."
+#             "mindrobot_left_arm_ik_env_cfg:MindRobotLeftArmIKRelEnvCfg"
+#         ),
+#         "robomimic_bc_cfg_entry_point": "isaaclab_tasks.manager_based.manipulation.mindrobot.agents:robomimic/bc_rnn_low_dim.json",
+#     },
+#     disable_env_checker=True,
+# )
+# ...existing code...
+gym.register(
+    id="Isaac-MindRobot-LeftArm-IK-Rel-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mindrobot_left_arm_ik_env_cfg:MindRobotLeftArmIKEnvCfg",
+        "robomimic_bc_cfg_entry_point": f"{__name__}.agents:robomimic/bc_rnn_low_dim.json",
+    },
+    disable_env_checker=True,
+)
+# ...existing code...
\ No newline at end of file
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/__init__.py
new file mode 100644
index 0000000..b9bef45
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_200.json b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_200.json
new file mode 100644
index 0000000..88a4aaa
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_200.json
@@ -0,0 +1,219 @@
+{
+    "algo_name": "bc",
+    "experiment": {
+        "name": "bc_rnn_image_franka_stack",
+        "validate": false,
+        "logging": {
+            "terminal_output_to_txt": true,
+            "log_tb": true
+        },
+        "save": {
+            "enabled": true,
+            "every_n_seconds": null,
+            "every_n_epochs": 20,
+            "epochs": [],
+            "on_best_validation": false,
+            "on_best_rollout_return": false,
+            "on_best_rollout_success_rate": true
+        },
+        "epoch_every_n_steps": 500,
+        "env": null,
+        "additional_envs": null,
+        "render": false,
+        "render_video": false,
+        "rollout": {
+            "enabled": false
+        }
+    },
+    "train": {
+        "data": null,
+        "num_data_workers": 4,
+        "hdf5_cache_mode": "low_dim",
+        "hdf5_use_swmr": true,
+        "hdf5_load_next_obs": false,
+        "hdf5_normalize_obs": false,
+        "hdf5_filter_key": null,
+        "hdf5_validation_filter_key": null,
+        "seq_length": 10,
+        "pad_seq_length": true,
+        "frame_stack": 1,
+        "pad_frame_stack": true,
+        "dataset_keys": [
+            "actions",
+            "rewards",
+            "dones"
+        ],
+        "goal_mode": null,
+        "cuda": true,
+        "batch_size": 16,
+        "num_epochs": 600,
+        "seed": 101
+    },
+    "algo": {
+        "optim_params": {
+            "policy": {
+                "optimizer_type": "adam",
+                "learning_rate": {
+                    "initial": 0.0001,
+                    "decay_factor": 0.1,
+                    "epoch_schedule": [],
+                    "scheduler_type": "multistep"
+                },
+                "regularization": {
+                    "L2": 0.0
+                }
+            }
+        },
+        "loss": {
+            "l2_weight": 1.0,
+            "l1_weight": 0.0,
+            "cos_weight": 0.0
+        },
+        "actor_layer_dims": [],
+        "gaussian": {
+            "enabled": false,
+            "fixed_std": false,
+            "init_std": 0.1,
+            "min_std": 0.01,
+            "std_activation": "softplus",
+            "low_noise_eval": true
+        },
+        "gmm": {
+            "enabled": true,
+            "num_modes": 5,
+            "min_std": 0.0001,
+            "std_activation": "softplus",
+            "low_noise_eval": true
+        },
+        "vae": {
+            "enabled": false,
+            "latent_dim": 14,
+            "latent_clip": null,
+            "kl_weight": 1.0,
+            "decoder": {
+                "is_conditioned": true,
+                "reconstruction_sum_across_elements": false
+            },
+            "prior": {
+                "learn": false,
+                "is_conditioned": false,
+                "use_gmm": false,
+                "gmm_num_modes": 10,
+                "gmm_learn_weights": false,
+                "use_categorical": false,
+                "categorical_dim": 10,
+                "categorical_gumbel_softmax_hard": false,
+                "categorical_init_temp": 1.0,
+                "categorical_temp_anneal_step": 0.001,
+                "categorical_min_temp": 0.3
+            },
+            "encoder_layer_dims": [
+                300,
+                400
+            ],
+            "decoder_layer_dims": [
+                300,
+                400
+            ],
+            "prior_layer_dims": [
+                300,
+                400
+            ]
+        },
+        "rnn": {
+            "enabled": true,
+            "horizon": 10,
+            "hidden_dim": 1000,
+            "rnn_type": "LSTM",
+            "num_layers": 2,
+            "open_loop": false,
+            "kwargs": {
+                "bidirectional": false
+            }
+        },
+        "transformer": {
+            "enabled": false,
+            "context_length": 10,
+            "embed_dim": 512,
+            "num_layers": 6,
+            "num_heads": 8,
+            "emb_dropout": 0.1,
+            "attn_dropout": 0.1,
+            "block_output_dropout": 0.1,
+            "sinusoidal_embedding": false,
+            "activation": "gelu",
+            "supervise_all_steps": false,
+            "nn_parameter_for_timesteps": true
+        }
+    },
+    "observation": {
+        "modalities": {
+            "obs": {
+                "low_dim": [
+                    "eef_pos",
+                    "eef_quat",
+                    "gripper_pos"
+                ],
+                "rgb": [
+                    "table_cam",
+                    "wrist_cam"
+                ],
+                "depth": [],
+                "scan": []
+            },
+            "goal": {
+                "low_dim": [],
+                "rgb": [],
+                "depth": [],
+                "scan": []
+            }
+        },
+        "encoder": {
+            "low_dim": {
+                "core_class": null,
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            },
+            "rgb": {
+                "core_class": "VisualCore",
+                "core_kwargs": {
+                    "feature_dimension": 64,
+                    "flatten": true,
+                    "backbone_class": "ResNet18Conv",
+                    "backbone_kwargs": {
+                        "pretrained": false,
+                        "input_coord_conv": false
+                    },
+                    "pool_class": "SpatialSoftmax",
+                    "pool_kwargs": {
+                        "num_kp": 32,
+                        "learnable_temperature": false,
+                        "temperature": 1.0,
+                        "noise_std": 0.0,
+                        "output_variance": false
+                    }
+                },
+                "obs_randomizer_class": "CropRandomizer",
+                "obs_randomizer_kwargs": {
+                    "crop_height": 181,
+                    "crop_width": 181,
+                    "num_crops": 1,
+                    "pos_enc": false
+                }
+            },
+            "depth": {
+                "core_class": "VisualCore",
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            },
+            "scan": {
+                "core_class": "ScanCore",
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            }
+        }
+    }
+}
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_cosmos.json b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_cosmos.json
new file mode 100644
index 0000000..6aa8e94
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_image_cosmos.json
@@ -0,0 +1,218 @@
+{
+    "algo_name": "bc",
+    "experiment": {
+        "name": "bc_rnn_image_franka_stack_cosmos",
+        "validate": false,
+        "logging": {
+            "terminal_output_to_txt": true,
+            "log_tb": true
+        },
+        "save": {
+            "enabled": true,
+            "every_n_seconds": null,
+            "every_n_epochs": 20,
+            "epochs": [],
+            "on_best_validation": false,
+            "on_best_rollout_return": false,
+            "on_best_rollout_success_rate": true
+        },
+        "epoch_every_n_steps": 500,
+        "env": null,
+        "additional_envs": null,
+        "render": false,
+        "render_video": false,
+        "rollout": {
+            "enabled": false
+        }
+    },
+    "train": {
+        "data": null,
+        "num_data_workers": 4,
+        "hdf5_cache_mode": "low_dim",
+        "hdf5_use_swmr": true,
+        "hdf5_load_next_obs": false,
+        "hdf5_normalize_obs": false,
+        "hdf5_filter_key": null,
+        "hdf5_validation_filter_key": null,
+        "seq_length": 10,
+        "pad_seq_length": true,
+        "frame_stack": 1,
+        "pad_frame_stack": true,
+        "dataset_keys": [
+            "actions",
+            "rewards",
+            "dones"
+        ],
+        "goal_mode": null,
+        "cuda": true,
+        "batch_size": 16,
+        "num_epochs": 600,
+        "seed": 101
+    },
+    "algo": {
+        "optim_params": {
+            "policy": {
+                "optimizer_type": "adam",
+                "learning_rate": {
+                    "initial": 0.0001,
+                    "decay_factor": 0.1,
+                    "epoch_schedule": [],
+                    "scheduler_type": "multistep"
+                },
+                "regularization": {
+                    "L2": 0.0
+                }
+            }
+        },
+        "loss": {
+            "l2_weight": 1.0,
+            "l1_weight": 0.0,
+            "cos_weight": 0.0
+        },
+        "actor_layer_dims": [],
+        "gaussian": {
+            "enabled": false,
+            "fixed_std": false,
+            "init_std": 0.1,
+            "min_std": 0.01,
+            "std_activation": "softplus",
+            "low_noise_eval": true
+        },
+        "gmm": {
+            "enabled": true,
+            "num_modes": 5,
+            "min_std": 0.0001,
+            "std_activation": "softplus",
+            "low_noise_eval": true
+        },
+        "vae": {
+            "enabled": false,
+            "latent_dim": 14,
+            "latent_clip": null,
+            "kl_weight": 1.0,
+            "decoder": {
+                "is_conditioned": true,
+                "reconstruction_sum_across_elements": false
+            },
+            "prior": {
+                "learn": false,
+                "is_conditioned": false,
+                "use_gmm": false,
+                "gmm_num_modes": 10,
+                "gmm_learn_weights": false,
+                "use_categorical": false,
+                "categorical_dim": 10,
+                "categorical_gumbel_softmax_hard": false,
+                "categorical_init_temp": 1.0,
+                "categorical_temp_anneal_step": 0.001,
+                "categorical_min_temp": 0.3
+            },
+            "encoder_layer_dims": [
+                300,
+                400
+            ],
+            "decoder_layer_dims": [
+                300,
+                400
+            ],
+            "prior_layer_dims": [
+                300,
+                400
+            ]
+        },
+        "rnn": {
+            "enabled": true,
+            "horizon": 10,
+            "hidden_dim": 1000,
+            "rnn_type": "LSTM",
+            "num_layers": 2,
+            "open_loop": false,
+            "kwargs": {
+                "bidirectional": false
+            }
+        },
+        "transformer": {
+            "enabled": false,
+            "context_length": 10,
+            "embed_dim": 512,
+            "num_layers": 6,
+            "num_heads": 8,
+            "emb_dropout": 0.1,
+            "attn_dropout": 0.1,
+            "block_output_dropout": 0.1,
+            "sinusoidal_embedding": false,
+            "activation": "gelu",
+            "supervise_all_steps": false,
+            "nn_parameter_for_timesteps": true
+        }
+    },
+    "observation": {
+        "modalities": {
+            "obs": {
+                "low_dim": [
+                    "eef_pos",
+                    "eef_quat",
+                    "gripper_pos"
+                ],
+                "rgb": [
+                    "table_cam"
+                ],
+                "depth": [],
+                "scan": []
+            },
+            "goal": {
+                "low_dim": [],
+                "rgb": [],
+                "depth": [],
+                "scan": []
+            }
+        },
+        "encoder": {
+            "low_dim": {
+                "core_class": null,
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            },
+            "rgb": {
+                "core_class": "VisualCore",
+                "core_kwargs": {
+                    "feature_dimension": 64,
+                    "flatten": true,
+                    "backbone_class": "ResNet18Conv",
+                    "backbone_kwargs": {
+                        "pretrained": false,
+                        "input_coord_conv": false
+                    },
+                    "pool_class": "SpatialSoftmax",
+                    "pool_kwargs": {
+                        "num_kp": 32,
+                        "learnable_temperature": false,
+                        "temperature": 1.0,
+                        "noise_std": 0.0,
+                        "output_variance": false
+                    }
+                },
+                "obs_randomizer_class": "CropRandomizer",
+                "obs_randomizer_kwargs": {
+                    "crop_height": 180,
+                    "crop_width": 180,
+                    "num_crops": 1,
+                    "pos_enc": false
+                }
+            },
+            "depth": {
+                "core_class": "VisualCore",
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            },
+            "scan": {
+                "core_class": "ScanCore",
+                "core_kwargs": {},
+                "obs_randomizer_class": null,
+                "obs_randomizer_kwargs": {}
+            }
+        }
+    }
+}
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_low_dim.json b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_low_dim.json
new file mode 100644
index 0000000..9e1edea
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/agents/robomimic/bc_rnn_low_dim.json
@@ -0,0 +1,101 @@
+{
+    "algo_name": "bc",
+    "experiment": {
+        "name": "bc_rnn_low_dim_mindbot_stack",
+        "validate": false,
+        "logging": {
+            "terminal_output_to_txt": true,
+            "log_tb": true
+        },
+        "save": {
+            "enabled": true,
+            "every_n_seconds": null,
+            "every_n_epochs": 100,
+            "epochs": [],
+            "on_best_validation": false,
+            "on_best_rollout_return": false,
+            "on_best_rollout_success_rate": true
+        },
+        "epoch_every_n_steps": 100,
+        "env": null,
+        "additional_envs": null,
+        "render": false,
+        "render_video": false,
+        "rollout": {
+            "enabled": false
+        }
+    },
+    "train": {
+        "data": null,
+        "num_data_workers": 4,
+        "hdf5_cache_mode": "all",
+        "hdf5_use_swmr": true,
+        "hdf5_normalize_obs": false,
+        "hdf5_filter_key": null,
+        "hdf5_validation_filter_key": null,
+        "seq_length": 10,
+        "dataset_keys": [
+            "actions"
+        ],
+        "goal_mode": null,
+        "cuda": true,
+        "batch_size": 100,
+        "num_epochs": 2000,
+        "seed": 101
+    },
+    "algo": {
+        "optim_params": {
+            "policy": {
+                "optimizer_type": "adam",
+                "learning_rate": {
+                    "initial": 0.001,
+                    "decay_factor": 0.1,
+                    "epoch_schedule": [],
+                    "scheduler_type": "multistep"
+                },
+                "regularization": {
+                    "L2": 0.0
+                }
+            }
+        },
+        "loss": {
+            "l2_weight": 1.0,
+            "l1_weight": 0.0,
+            "cos_weight": 0.0
+        },
+        "actor_layer_dims": [],
+        "gmm": {
+            "enabled": true,
+            "num_modes": 5,
+            "min_std": 0.0001,
+            "std_activation": "softplus",
+            "low_noise_eval": true
+        },
+        "rnn": {
+            "enabled": true,
+            "horizon": 10,
+            "hidden_dim": 400,
+            "rnn_type": "LSTM",
+            "num_layers": 2,
+            "open_loop": false,
+            "kwargs": {
+                "bidirectional": false
+            }
+        }
+    },
+    "observation": {
+        "modalities": {
+            "obs": {
+                "low_dim": [
+                    "eef_pos",
+                    "eef_quat",
+                    "gripper_pos",
+                    "object"
+                ],
+                "rgb": [],
+                "depth": [],
+                "scan": []
+            }
+        }
+    }
+}
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_cfg.py b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_cfg.py
new file mode 100644
index 0000000..bc908b0
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_cfg.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2024, MindRobot Project
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configuration for MindRobot dual-arm robot."""
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+
+##
+# ✅ 使用你的本地 USD 绝对路径
+##
+
+MINDBOT_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        # =====================================================
+        # ✅ 你的本地 USD 文件路径
+        # =====================================================
+        usd_path="C:/Users/PC/workpalce/maic_usd_assets/robots/mindrobot-1105-lab/mindrobot_cd2.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+            max_depenetration_velocity=1.0,
+            linear_damping=0.5,
+            angular_damping=0.5,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            # Allow root to be affected by physics (gravity) so robot won't hover
+            fix_root_link=False,
+            enabled_self_collisions=False,
+            solver_position_iteration_count=16,
+            solver_velocity_iteration_count=8,
+            stabilization_threshold=1e-6,
+        ),
+        collision_props=sim_utils.CollisionPropertiesCfg(
+            collision_enabled=True,
+            contact_offset=0.0005,
+            rest_offset=0,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        joint_pos={
+            # ====== 左臂关节（主控臂）======
+            "l_joint1": 2.3562,     # 135°
+            "l_joint2": -1.2217,    # -70°
+            "l_joint3": -1.5708,    # -90°
+            "l_joint4": 1.5708,     # 90°
+            "l_joint5": 1.5708,     # 90°
+            "l_joint6": -1.2217,    # -70°
+            # ====== 右臂关节（静止）======
+            "r_joint1": 0.0,
+            "r_joint2": 0.0,
+            "r_joint3": 0.0,
+            "r_joint4": 0.0,
+            "r_joint5": 0.0,
+            "r_joint6": 0.0,
+            # ====== 左手夹爪 ======
+            "left_hand_joint_left": 0.0,     # 0.0=打开, -0.03=闭合
+            "left_hand_joint_right": 0.0,    # 0.0=打开,  0.03=闭合
+            # ====== 右手夹爪 ======
+            "right_hand_joint_left": 0.0,
+            "right_hand_joint_right": 0.0,
+            # ====== 躯干 ======
+            "PrismaticJoint": 0.26,
+            # ====== 头部 ======
+            "head_revoluteJoint": 0.0,
+        },
+        # Lower the spawn height to sit on the ground (was 0.05m above ground)
+        pos=(0, 0, 0.0),
+    ),
+    actuators={
+        "left_arm": ImplicitActuatorCfg(
+            joint_names_expr=["l_joint[1-6]"],
+            effort_limit_sim=100.0,
+            velocity_limit_sim=100.0,
+            # ✅ IK 控制需要高刚度
+            stiffness=10000.0,
+            damping=1000.0,
+        ),
+        "right_arm": ImplicitActuatorCfg(
+            joint_names_expr=["r_joint[1-6]"],
+            effort_limit_sim=100.0,
+            velocity_limit_sim=100.0,
+            stiffness=10000.0,
+            damping=1000.0,
+        ),
+        "head": ImplicitActuatorCfg(
+            joint_names_expr=["head_revoluteJoint"],
+            stiffness=10000.0,
+            damping=1000.0,
+        ),
+        "trunk": ImplicitActuatorCfg(
+            joint_names_expr=["PrismaticJoint"],
+            stiffness=40000.0,
+            damping=4000.0,
+            effort_limit_sim=10000.0,
+            velocity_limit_sim=0.2,
+        ),
+        "wheels": ImplicitActuatorCfg(
+            joint_names_expr=[".*_revolute_Joint"],
+            stiffness=0.0,
+            damping=100.0,
+            effort_limit_sim=200.0,
+            velocity_limit_sim=50.0,
+        ),
+        # ✅ 夹爪：高刚度确保精准控制
+        "grippers": ImplicitActuatorCfg(
+            joint_names_expr=[".*_hand_joint.*"],
+            stiffness=10000.0,
+            damping=1000.0,
+            effort_limit_sim=50.0,
+            velocity_limit_sim=50.0,
+        ),
+    },
+)
+
+##
+# ✅ 专用于 IK 控制的高 PD 版本（可选但推荐）
+##
+
+MINDBOT_HIGH_PD_CFG = MINDBOT_CFG.copy()
+MINDBOT_HIGH_PD_CFG.actuators["left_arm"].stiffness = 40000.0
+MINDBOT_HIGH_PD_CFG.actuators["left_arm"].damping = 4000.0
diff --git a/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_left_arm_ik_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_left_arm_ik_env_cfg.py
new file mode 100644
index 0000000..4dc3947
--- /dev/null
+++ b/source/mindbot/mindbot/tasks/manager_based/il/open_drybox/mindrobot_left_arm_ik_env_cfg.py
@@ -0,0 +1,271 @@
+# Copyright (c) 2024, MindRobot Project
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""MindRobot left arm IK-Rel environment config for teleoperation."""
+
+from __future__ import annotations
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+from isaaclab.devices import DevicesCfg
+from isaaclab.devices.keyboard import Se3KeyboardCfg
+from isaaclab.devices.spacemouse import Se3SpaceMouseCfg
+from isaaclab.devices.gamepad import Se3GamepadCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.envs.mdp.actions.actions_cfg import (
+    BinaryJointPositionActionCfg,
+    DifferentialInverseKinematicsActionCfg,
+)
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg
+from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg
+from isaaclab.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg
+from isaaclab.utils import configclass
+from isaaclab.markers.config import FRAME_MARKER_CFG
+from isaaclab.devices.openxr import XrCfg
+
+# 导入基础配置和MDP函数
+from isaaclab_tasks.manager_based.manipulation.stack import mdp
+
+# 导入 MindRobot 资产
+from .mindrobot_cfg import MINDBOT_HIGH_PD_CFG
+
+# 在文件开头添加
+import isaaclab.utils.assets as assets_utils
+
+# # 然后在 scene 配置中使用
+# spawn=sim_utils.UsdFileCfg(
+#     usd_path=f"{assets_utils.ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
+# ),
+# =====================================================================
+# Scene Configuration
+# =====================================================================
+
+@configclass
+class MindRobotTeleopSceneCfg(InteractiveSceneCfg):
+    """Minimal scene for MindRobot teleoperation: robot + table + optional cube."""
+
+    # Ground plane
+    plane = AssetBaseCfg(
+        prim_path="/World/GroundPlane",
+        init_state=AssetBaseCfg.InitialStateCfg(pos=[0, 0, -1.05]),
+        spawn=GroundPlaneCfg(),
+    )
+
+    # # Table
+    # table = AssetBaseCfg(
+    #     prim_path="{ENV_REGEX_NS}/Table",
+    #     init_state=AssetBaseCfg.InitialStateCfg(pos=[0.5, 0, 0], rot=[0.707, 0, 0, 0.707]),
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=f"{assets_utils.ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
+    #     ),
+    # )
+
+    # Optional: Single cube for testing (can be removed if not needed)
+    cube = RigidObjectCfg(
+        prim_path="{ENV_REGEX_NS}/Cube",
+        spawn=sim_utils.CuboidCfg(
+            size=(0.04, 0.04, 0.04),
+            rigid_props=RigidBodyPropertiesCfg(),
+            mass_props=sim_utils.MassPropertiesCfg(mass=0.1),
+            collision_props=sim_utils.CollisionPropertiesCfg(),
+            visual_material=sim_utils.PreviewSurfaceCfg(
+                diffuse_color=(1.0, 0.0, 0.0)
+            ),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(0.5, 0.0, 0.45)),
+    )
+
+    # MindRobot
+    robot: ArticulationCfg = MINDBOT_HIGH_PD_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+
+    # Lighting
+    light = AssetBaseCfg(
+        prim_path="/World/light",
+        spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
+    )
+
+    # End-effector frame transformer
+    ee_frame: FrameTransformerCfg = None  # Will be set in __post_init__
+
+
+# =====================================================================
+# Actions Configuration
+# =====================================================================
+
+@configclass
+class MindRobotTeleopActionsCfg:
+    """Actions for MindRobot left arm IK teleoperation."""
+
+    # Left arm IK control
+    arm_action = DifferentialInverseKinematicsActionCfg(
+        asset_name="robot",
+        joint_names=["l_joint[1-6]"],
+        body_name="Link_6",  # Last link of left arm
+        controller=DifferentialIKControllerCfg(
+            command_type="pose",
+            use_relative_mode=True,
+            ik_method="dls",
+        ),
+        scale=0.5,
+        body_offset=DifferentialInverseKinematicsActionCfg.OffsetCfg(
+            pos=[0.0, 0.0, 0.0]
+        ),
+    )
+
+    # Left gripper control (binary: open/close)
+    gripper_action = BinaryJointPositionActionCfg(
+        asset_name="robot",
+        joint_names=["left_hand_joint_left", "left_hand_joint_right"],
+        open_command_expr={
+            "left_hand_joint_left": 0.0,
+            "left_hand_joint_right": 0.0,
+        },
+        close_command_expr={
+            "left_hand_joint_left": -0.03,
+            "left_hand_joint_right": 0.03,
+        },
+    )
+
+
+# =====================================================================
+# Observations Configuration
+# =====================================================================
+
+@configclass
+class MindRobotTeleopObsCfg:
+    """Minimal observations for MindRobot teleoperation."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group - only robot state, no cube dependencies."""
+
+        actions = ObsTerm(func=mdp.last_action)
+        joint_pos = ObsTerm(func=mdp.joint_pos_rel)
+        joint_vel = ObsTerm(func=mdp.joint_vel_rel)
+        eef_pos = ObsTerm(func=mdp.ee_frame_pos)
+        eef_quat = ObsTerm(func=mdp.ee_frame_quat)
+        gripper_pos = ObsTerm(func=mdp.gripper_pos)
+
+        def __post_init__(self):
+            self.enable_corruption = False
+            self.concatenate_terms = False
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+# =====================================================================
+# Terminations Configuration
+# =====================================================================
+
+@configclass
+class MindRobotTeleopTerminationsCfg:
+    """Minimal terminations for teleoperation - only time_out or none."""
+
+    # Optional: Keep time_out for safety, or remove entirely
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+
+# =====================================================================
+# Main Environment Configuration
+# =====================================================================
+
+@configclass
+class EmptyCfg:
+    pass
+
+@configclass
+class MindRobotLeftArmIKEnvCfg(ManagerBasedRLEnvCfg):
+    """MindRobot 左臂 IK-Rel 遥操作环境配置（最小化版本）。"""
+
+    # Scene settings
+    scene: MindRobotTeleopSceneCfg = MindRobotTeleopSceneCfg(
+        num_envs=1,
+        env_spacing=2.5,
+    )
+
+    # Basic settings
+    observations: MindRobotTeleopObsCfg = MindRobotTeleopObsCfg()
+    actions: MindRobotTeleopActionsCfg = MindRobotTeleopActionsCfg()
+
+    # MDP settings
+    terminations: MindRobotTeleopTerminationsCfg = MindRobotTeleopTerminationsCfg()
+
+    # Unused managers
+    commands: EmptyCfg = EmptyCfg()
+    rewards: EmptyCfg = EmptyCfg()
+    events: EmptyCfg = EmptyCfg()
+    curriculum: EmptyCfg = EmptyCfg()
+
+    # XR configuration for hand tracking (if needed)
+    xr: XrCfg = XrCfg(
+        anchor_pos=(-0.1, -0.5, -1.05),
+        anchor_rot=(0.866, 0, 0, -0.5),
+    )
+
+    def __post_init__(self):
+        """Post initialization."""
+        super().__post_init__()
+
+        # General settings
+        self.decimation = 2
+        self.episode_length_s = 50.0
+
+        # Simulation settings
+        self.sim.dt = 0.01  # 100Hz
+        self.sim.render_interval = 2
+
+        # Set MindRobot
+        self.scene.robot = MINDBOT_HIGH_PD_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+
+        # Configure end-effector frame transformer
+        marker_cfg = FRAME_MARKER_CFG.copy()
+        marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1)
+        marker_cfg.prim_path = "/Visuals/FrameTransformer"
+        self.scene.ee_frame = FrameTransformerCfg(
+            prim_path="{ENV_REGEX_NS}/Robot/rm_65_fb_left/base_link",
+            debug_vis=False,
+            visualizer_cfg=marker_cfg,
+            target_frames=[
+                FrameTransformerCfg.FrameCfg(
+                    prim_path="{ENV_REGEX_NS}/Robot/rm_65_fb_left/Link_6",
+                    name="end_effector",
+                    offset=OffsetCfg(
+                        pos=[0.0, 0.0, 0.0],
+                    ),
+                ),
+            ],
+        )
+
+        # Configure teleoperation devices
+        self.teleop_devices = DevicesCfg(
+            devices={
+                "keyboard": Se3KeyboardCfg(
+                    pos_sensitivity=0.05,
+                    rot_sensitivity=0.05,
+                    sim_device=self.sim.device,
+                ),
+                "spacemouse": Se3SpaceMouseCfg(
+                    pos_sensitivity=0.05,
+                    rot_sensitivity=0.05,
+                    sim_device=self.sim.device,
+                ),
+                "gamepad": Se3GamepadCfg(
+                    pos_sensitivity=0.1,
+                    rot_sensitivity=0.1,
+                    sim_device=self.sim.device,
+                ),
+            }
+        )
+
+        # Gripper parameters for status checking
+        self.gripper_joint_names = ["left_hand_joint_left", "left_hand_joint_right"]
+        self.gripper_open_val = 0.0
+        self.gripper_threshold = 0.005
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/rl_games_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rl_games_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/rl_games_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/rsl_rl_ppo_cfg.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/rsl_rl_ppo_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/rsl_rl_ppo_cfg.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/sb3_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/sb3_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/sb3_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_amp_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_amp_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_amp_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_ippo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ippo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_ippo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_mappo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_mappo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_mappo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/agents/skrl_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/agents/skrl_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/centrifuge_env_cfg.py
similarity index 80%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/centrifuge_env_cfg.py
index 6e24fe5..ffe86b9 100644
--- a/source/mindbot/mindbot/tasks/manager_based/centrifuge/centrifuge_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/centrifuge_env_cfg.py
@@ -21,7 +21,10 @@ from isaaclab.managers import CurriculumTermCfg as CurrTerm
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.utils import configclass
 from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
-from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.envs.mdp.actions.actions_cfg import (
+    DifferentialInverseKinematicsActionCfg,
+    BinaryJointPositionActionCfg,
+)
 from isaaclab.sensors import CameraCfg
 
 from . import mdp
@@ -34,10 +37,13 @@ from mindbot.robot.mindbot import MINDBOT_CFG
 
 # ====== 其他物体配置 ======
 from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
-from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+from isaaclab.sim.schemas.schemas_cfg import (
+    RigidBodyPropertiesCfg,
+    CollisionPropertiesCfg,
+)
 
 
-TABLE_CFG=RigidObjectCfg(
+TABLE_CFG = RigidObjectCfg(
     prim_path="{ENV_REGEX_NS}/Table",
     init_state=RigidObjectCfg.InitialStateCfg(
         pos=[0.95, -0.3, 0.005],
@@ -46,17 +52,17 @@ TABLE_CFG=RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/table/Table_C.usd",
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -65,30 +71,30 @@ LID_CFG = RigidObjectCfg(
     prim_path="{ENV_REGEX_NS}/Lid",
     init_state=RigidObjectCfg.InitialStateCfg(
         # pos=[0.9523,-0.2512,1.0923],
-        pos=[0.803 , -0.25, 1.0282],#(0.9988, -0.2977, 1.0498633)
+        pos=[0.803, -0.25, 1.0282],  # (0.9988, -0.2977, 1.0498633)
         # rot=[-0.7071, 0.0, 0.0, 0.7071],
         rot=[0.0, 0.0, 0.0, 1.0],
         lin_vel=[0.0, 0.0, 0.0],
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/assets/Collected_equipment001/Equipment/tube1.usd",
         # scale=(0.2, 0.2, 0.2),
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             max_angular_velocity=1000.0,
             max_linear_velocity=1000.0,
-            max_depenetration_velocity=0.5,#original 5.0
-            linear_damping=5.0, #original 0.5
-            angular_damping=5.0, #original 0.5
+            max_depenetration_velocity=0.5,  # original 5.0
+            linear_damping=5.0,  # original 0.5
+            angular_damping=5.0,  # original 0.5
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -104,7 +110,7 @@ LID_CFG = RigidObjectCfg(
 #         ang_vel=[0.0, 0.0, 0.0],
 #     ),
 #     spawn=UsdFileCfg(
-#         usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
+#         usd_path="/home/maic/LYT/maic_usd_assets/equipment/centrifuge/Lid_B.usd",
 #         # scale=(0.2, 0.2, 0.2),
 #         rigid_props=RigidBodyPropertiesCfg(
 #             rigid_body_enabled= True,
@@ -127,9 +133,9 @@ LID_CFG = RigidObjectCfg(
 
 CENTRIFUGE_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/centrifuge/centrifuge.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False, 
+            disable_gravity=False,
             max_depenetration_velocity=1.0,
             linear_damping=0.5,
             angular_damping=0.5,
@@ -140,45 +146,41 @@ CENTRIFUGE_CFG = ArticulationCfg(
             solver_velocity_iteration_count=16,
             stabilization_threshold=1e-6,
             # 【重要】必须要固定底座，否则机器人按盖子时，离心机会翻倒
-            # fix_root_link=True, 
+            # fix_root_link=True,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
         # 1. 参照机器人配置，在这里定义初始关节角度
         joint_pos={
-            "r1": math.radians(0.0), # 转子位置（无关紧要）
-            
+            "r1": math.radians(0.0),  # 转子位置（无关紧要）
             # 【关键修改】：初始让盖子处于打开状态
             # 您的 USD 限位是 (-100, 0)，-100度为最大开启
-            "r2": math.radians(-100.0), 
+            "r2": math.radians(-100.0),
         },
-        pos=(0.80, -0.25,  0.8085),#(0.95, -0.3,  0.8085)
+        pos=(0.80, -0.25, 0.8085),  # (0.95, -0.3,  0.8085)
         rot=[-0.7071, 0.0, 0.0, 0.7071],
         # rot=[0.0, 0.0, 0.0, 1.0],
     ),
     actuators={
         "lid_passive_mechanism": ImplicitActuatorCfg(
             joint_names_expr=["r2"],
-            
             # 【关键差异说明】
             # 机器人的手臂 stiffness 是 10000.0，因为机器人需要精准定位且不晃动。
-            # 
+            #
             # 但是！离心机盖子如果设为 10000，它就会像焊死在空中一样，机器人根本按不动。
             # 这里设为 200.0 左右：
             # 1. 力度足以克服重力，让盖子初始保持打开。
             # 2. 力度又足够软，当机器人用力下压时，盖子会顺从地闭合。
-            stiffness=200.0, 
-            
+            stiffness=200.0,
             # 阻尼：给一点阻力，模拟液压杆手感，防止像弹簧一样乱晃
-            damping=20.0,    
-            
+            damping=20.0,
             # 确保力矩上限不要太小，否则托不住盖子
-            effort_limit_sim=1000.0, 
+            effort_limit_sim=1000.0,
             velocity_limit_sim=100.0,
         ),
         # 转子可以硬一点，不需要被机器人按动
@@ -187,14 +189,14 @@ CENTRIFUGE_CFG = ArticulationCfg(
             stiffness=0.0,
             damping=10.0,
         ),
-    }
+    },
 )
 
 
 ROOM_CFG = AssetBaseCfg(
     prim_path="{ENV_REGEX_NS}/Room",
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
     ),
 )
 
@@ -217,7 +219,9 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     # Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
     Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
     table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
-    centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(prim_path="{ENV_REGEX_NS}/centrifuge")
+    centrifuge: ArticulationCfg = CENTRIFUGE_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/centrifuge"
+    )
     lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
     # lights
     dome_light = AssetBaseCfg(
@@ -236,7 +240,7 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     # )
     """
     # left_hand_camera=CameraCfg(
-    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", 
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
     #     update_period=1/120,
     #     height=480,
     #     width=640,
@@ -244,7 +248,7 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     #     spawn=None,
     # )
     # right_hand_camera=CameraCfg(
-    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", 
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
     #     update_period=1/120,
     #     height=480,
     #     width=640,
@@ -252,7 +256,7 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     #     spawn=None,
     # )
     # head_camera=CameraCfg(
-    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
     #     update_period=1/120,
     #     height=480,
     #     width=640,
@@ -260,18 +264,23 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     #     spawn=None,
     # )
     # chest_camera=CameraCfg(
-    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", 
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
     #     update_period=1/120,
     #     height=480,
     #     width=640,
     #     data_types=["rgb"],
     #     spawn=None,
     # )
+
+
 ##
 # MDP settings
 ##
 
-def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+
+def left_gripper_pos_w(
+    env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")
+) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
     body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
     pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
@@ -294,14 +303,12 @@ class ActionsCfg:
     #     ),
     #     scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
     # )
-    
+
     right_arm_fixed = mdp.JointPositionActionCfg(
         asset_name="Mindbot",
-        joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
-        
+        joint_names=["r_joint[1-6]"],  # 对应 l_joint1 到 l_joint6
         # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
-        scale=0.0, 
-        
+        scale=0.0,
         # 2. 偏移设为你的目标角度（弧度）：这告诉电机“永远停在这里”
         # 对应 (135, -70, -90, 90, 90, -70)
         offset={
@@ -317,34 +324,35 @@ class ActionsCfg:
     grippers_position = mdp.BinaryJointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["left_hand_joint_left", "left_hand_joint_right"],
-        
         # 修正：使用字典格式
         # open_command_expr={"关节名正则": 值}
         open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
-        
         # close_command_expr={"关节名正则": 值}
-        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+        close_command_expr={
+            "left_hand_joint_left": -0.03,
+            "left_hand_joint_right": 0.03,
+        },
     )
 
-
     trunk_position = mdp.JointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["PrismaticJoint"],
         scale=0.00,
-        offset=0.24, # 0.3
-        clip=None
+        offset=0.24,  # 0.3
+        clip=None,
     )
 
     centrifuge_lid_passive = mdp.JointPositionActionCfg(
-        asset_name="centrifuge", # 对应场景中的名称
+        asset_name="centrifuge",  # 对应场景中的名称
         joint_names=["r2"],
         # 将 scale 设为 0，意味着 RL 算法输出的任何值都会被乘 0，即无法干扰它
-        scale=0.00, 
+        scale=0.00,
         # 将 offset 设为目标角度，这会成为 PD 控制器的恒定 Target
-        offset= -1.7453,
-        clip=None
+        offset=-1.7453,
+        clip=None,
     )
 
+
 @configclass
 class ObservationsCfg:
     """Observation specifications for the MDP."""
@@ -353,15 +361,29 @@ class ObservationsCfg:
     class PolicyCfg(ObsGroup):
         """Observations for policy group."""
 
-        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        
-        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
-                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
-        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
-        centrifuge_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")})
-        
+        mindbot_joint_pos_rel = ObsTerm(
+            func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_pos = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_quat = ObsTerm(
+            func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+
+        left_gripper_pos = ObsTerm(
+            func=left_gripper_pos_w,
+            params={
+                "asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])
+            },
+        )
+        lid_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}
+        )
+        centrifuge_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("centrifuge")}
+        )
+
         def __post_init__(self) -> None:
             self.enable_corruption = False
             self.concatenate_terms = True
@@ -409,29 +431,36 @@ class EventCfg:
             "velocity_range": (0.0, 0.0),
         },
     )
-    
-    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
-        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+    reset_table = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("table"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
     reset_centrifuge = EventTerm(
-        func=mdp.reset_root_state_uniform, 
+        func=mdp.reset_root_state_uniform,
         mode="reset",
         params={
-            "asset_cfg": SceneEntityCfg("centrifuge"), 
-            # "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, 
-            "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)}, 
-            "velocity_range": {"x": (0.0, 0.0)}
-            }
-        )
+            "asset_cfg": SceneEntityCfg("centrifuge"),
+            # "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
+            "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
     reset_lid = EventTerm(
-        func=mdp.reset_root_state_uniform, 
+        func=mdp.reset_root_state_uniform,
         mode="reset",
         params={
-            "asset_cfg": SceneEntityCfg("lid"), 
-            # "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, 
-            "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)}, 
-            "velocity_range": {"x": (0.0, 0.0)}
-            }
-        )
+            "asset_cfg": SceneEntityCfg("lid"),
+            # "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
+            "pose_range": {"x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
 
 
 @configclass
@@ -444,12 +473,12 @@ class RewardsCfg:
             "lid_cfg": SceneEntityCfg("lid"),
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "gripper_body_name": "right_hand_body",
-            "gripper_forward_axis": (0.0, 0.0, 1.0), 
-            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "gripper_forward_axis": (0.0, 0.0, 1.0),
+            "gripper_width_axis": (0.0, 1.0, 0.0),
             "lid_handle_axis": (0.0, 1.0, 0.0),
             "max_angle_deg": 85.0,  # 允许60度内的偏差
         },
-        weight=5.0 #original 5.0
+        weight=5.0,  # original 5.0
     )
 
     # stage 2
@@ -462,9 +491,9 @@ class RewardsCfg:
             "left_gripper_name": "right_hand_l",
             "right_gripper_name": "right_hand__r",
             "height_offset": 0.115,  # Z方向：lid 上方 0.1m
-            "std": 0.3,  # 位置对齐的容忍度    
+            "std": 0.3,  # 位置对齐的容忍度
         },
-        weight=3.0 #original 3.0
+        weight=3.0,  # original 3.0
     )
     approach_lid_penalty = RewTerm(
         func=mdp.gripper_lid_distance_penalty,
@@ -477,7 +506,7 @@ class RewardsCfg:
         },
         # weight 为负数表示惩罚。
         # 假设距离 0.5m，惩罚就是 -0.5 * 2.0 = -1.0 分。
-        weight=-1.0 
+        weight=-1.0,
     )
     # # 【新增】精细对齐 (引导进入 2cm 圈)
     # gripper_lid_fine_alignment = RewTerm(
@@ -493,7 +522,7 @@ class RewardsCfg:
     #     weight=10.0 # 高权重
     # )
 
-    # gripper_close_interaction = RewTerm( 
+    # gripper_close_interaction = RewTerm(
     #     func=mdp.gripper_close_when_near,
     #     params={
     #         "lid_cfg": SceneEntityCfg("lid"),
@@ -527,12 +556,10 @@ class RewardsCfg:
     #         "std": 0.1
     #     },
     #     # 权重设大一点，告诉它这是最终目标
-    #     weight=10.0 
+    #     weight=10.0
     # )
 
 
-
-
 @configclass
 class TerminationsCfg:
     """Termination terms for the MDP."""
@@ -543,7 +570,7 @@ class TerminationsCfg:
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
     )
-    
+
     lid_fly_away = DoneTerm(
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
@@ -552,25 +579,25 @@ class TerminationsCfg:
     # 新增：盖子掉落判定
     # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
     lid_dropped = DoneTerm(
-        func=mdp.base_height_failure, # 复用高度判定函数
+        func=mdp.base_height_failure,  # 复用高度判定函数
         params={
-            "asset_cfg": SceneEntityCfg("lid"), 
-            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+            "asset_cfg": SceneEntityCfg("lid"),
+            "minimum_height": 0.79,  # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
         },
     )
-    
 
 
 ##
 # Environment configuration
 ##
 
+
 @configclass
 class CurriculumCfg:
     pass
     # action_rate = CurrTerm(func=mdp.modify_reward_weight,
     #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
-                           
+
     # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
     #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
 
@@ -579,10 +606,10 @@ class CurriculumCfg:
     #     params={
     #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
     #         "address": "rewards.gripper_lid_position_alignment.params.std",
-            
+
     #         # 修改逻辑：使用我们刚才写的函数
     #         "modify_fn": mdp.annealing_std,
-            
+
     #         # 传给函数的参数
     #         "modify_params": {
     #             "start_step": 00, # 约 600 轮
@@ -593,6 +620,7 @@ class CurriculumCfg:
     #     }
     # )
 
+
 @configclass
 class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     # Scene settings
@@ -610,20 +638,20 @@ class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     def __post_init__(self) -> None:
         """Post initialization."""
         # general settings
-        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.decimation = 4  # original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
         self.episode_length_s = 10
         # viewer settings
-        self.viewer.eye = (3.5, 0.0, 13.2)#(3.5, 0.0, 3.2)
+        self.viewer.eye = (3.5, 0.0, 13.2)  # (3.5, 0.0, 3.2)
         # simulation settings
-        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.dt = 1 / 120  # original 1 / 60
         self.sim.render_interval = self.decimation
         # # 1. 基础堆内存
         self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
-        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
         self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
-        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
-        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
-        
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024  # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024  # 32万个 patch
+
         # # 5. 聚合对容量 (针对复杂的 Articulation)
         self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
         self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/curriculums.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/curriculums.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/curriculums.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/gripper.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/gripper.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/gripper.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/rewards.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/rewards.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/rewards.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/terminations.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/centrifuge/mdp/terminations.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/centrifuge/mdp/terminations.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/rl_games_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/rl_games_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/rsl_rl_ppo_cfg.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/rsl_rl_ppo_cfg.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/sb3_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/sb3_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_amp_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_amp_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_ippo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_ippo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_mappo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_mappo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/agents/skrl_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/curriculums.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/curriculums.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/curriculums.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/gripper.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/gripper.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/gripper.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/rewards.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/rewards.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/terminations.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/terminations.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mdp/terminations.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mindbot_env_cfg.py
similarity index 72%
rename from source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mindbot_env_cfg.py
index 1bbacf2..d91617a 100644
--- a/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/rl/mindbot/mindbot_env_cfg.py
@@ -21,7 +21,10 @@ from isaaclab.managers import CurriculumTermCfg as CurrTerm
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.utils import configclass
 from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
-from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.envs.mdp.actions.actions_cfg import (
+    DifferentialInverseKinematicsActionCfg,
+    BinaryJointPositionActionCfg,
+)
 
 from . import mdp
 
@@ -33,10 +36,13 @@ from mindbot.robot.mindbot import MINDBOT_CFG
 
 # ====== 其他物体配置 ======
 from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
-from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+from isaaclab.sim.schemas.schemas_cfg import (
+    RigidBodyPropertiesCfg,
+    CollisionPropertiesCfg,
+)
 
 
-TABLE_CFG=RigidObjectCfg(
+TABLE_CFG = RigidObjectCfg(
     prim_path="{ENV_REGEX_NS}/Table",
     init_state=RigidObjectCfg.InitialStateCfg(
         pos=[0.95, -0.3, 0.005],
@@ -45,17 +51,17 @@ TABLE_CFG=RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/table/Table_C.usd",
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -69,23 +75,23 @@ LID_CFG = RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
         # scale=(0.2, 0.2, 0.2),
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             max_angular_velocity=1000.0,
             max_linear_velocity=1000.0,
-            max_depenetration_velocity=0.5,#original 5.0
-            linear_damping=5.0, #original 0.5
-            angular_damping=5.0, #original 0.5
+            max_depenetration_velocity=0.5,  # original 5.0
+            linear_damping=5.0,  # original 0.5
+            angular_damping=5.0,  # original 0.5
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -93,13 +99,12 @@ LID_CFG = RigidObjectCfg(
 
 ULTRASOUND_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False, 
-            max_depenetration_velocity=1.0
+            disable_gravity=False, max_depenetration_velocity=1.0
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False,#
+            enabled_self_collisions=False,  #
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             stabilization_threshold=1e-6,
@@ -112,13 +117,12 @@ ULTRASOUND_CFG = ArticulationCfg(
         "passive_damper": ImplicitActuatorCfg(
             # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
             joint_names_expr=[".*"],
-            
-            stiffness=0.0, 
-            damping=1000.0, 
-            effort_limit_sim=10000.0, 
+            stiffness=0.0,
+            damping=1000.0,
+            effort_limit_sim=10000.0,
             velocity_limit_sim=100.0,
         ),
-    }
+    },
 )
 
 ##
@@ -139,7 +143,9 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     # robot
     Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
     table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
-    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/ultrasound"
+    )
     lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
     # lights
     dome_light = AssetBaseCfg(
@@ -152,7 +158,10 @@ class MindbotSceneCfg(InteractiveSceneCfg):
 # MDP settings
 ##
 
-def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+
+def left_gripper_pos_w(
+    env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")
+) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
     body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
     pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
@@ -167,11 +176,11 @@ class ActionsCfg:
     left_arm_ee = DifferentialInverseKinematicsActionCfg(
         asset_name="Mindbot",
         joint_names=["l_joint[1-6]"],  # 左臂的6个关节
-        body_name="left_hand_body",     # 末端执行器body名称
+        body_name="left_hand_body",  # 末端执行器body名称
         controller=DifferentialIKControllerCfg(
-            command_type="pose",        # 控制位置+姿态
-            use_relative_mode=True,     # 相对模式：动作是增量
-            ik_method="dls",            # Damped Least Squares方法
+            command_type="pose",  # 控制位置+姿态
+            use_relative_mode=True,  # 相对模式：动作是增量
+            ik_method="dls",  # Damped Least Squares方法
         ),
         scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
     )
@@ -197,22 +206,22 @@ class ActionsCfg:
     grippers_position = mdp.BinaryJointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["left_hand_joint_left", "left_hand_joint_right"],
-        
         # 修正：使用字典格式
         # open_command_expr={"关节名正则": 值}
         open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
-        
         # close_command_expr={"关节名正则": 值}
-        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+        close_command_expr={
+            "left_hand_joint_left": -0.03,
+            "left_hand_joint_right": 0.03,
+        },
     )
 
-
     trunk_position = mdp.JointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["PrismaticJoint"],
         scale=0.00,
         offset=0.3,
-        clip=None
+        clip=None,
     )
 
 
@@ -224,15 +233,29 @@ class ObservationsCfg:
     class PolicyCfg(ObsGroup):
         """Observations for policy group."""
 
-        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        
-        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
-                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
-        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
-        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
-        
+        mindbot_joint_pos_rel = ObsTerm(
+            func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_pos = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_quat = ObsTerm(
+            func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+
+        left_gripper_pos = ObsTerm(
+            func=left_gripper_pos_w,
+            params={
+                "asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])
+            },
+        )
+        lid_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}
+        )
+        ultrasound_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")}
+        )
+
         def __post_init__(self) -> None:
             self.enable_corruption = False
             self.concatenate_terms = True
@@ -244,6 +267,7 @@ class ObservationsCfg:
 @configclass
 class EventCfg:
     """Configuration for events."""
+
     # reset_mindbot_root=EventTerm(
     #     func=mdp.reset_root_state_uniform,
     #     mode="reset",
@@ -273,18 +297,39 @@ class EventCfg:
             "velocity_range": (0.0, 0.0),
         },
     )
-    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
-        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
-    reset_ultrasound = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
-        params={"asset_cfg": SceneEntityCfg("ultrasound"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
-    reset_lid = EventTerm(func=mdp.reset_root_state_uniform, mode="reset",
-        params={"asset_cfg": SceneEntityCfg("lid"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_table = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("table"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
+    reset_ultrasound = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("ultrasound"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
+    reset_lid = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("lid"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
 
 
 @configclass
 class RewardsCfg:
     """Reward terms for the MDP."""
-    
+
     # 姿态对齐奖励： stage 1
     # gripper_lid_orientation_alignment = RewTerm(
     #     func=mdp.gripper_lid_orientation_alignment,
@@ -292,8 +337,8 @@ class RewardsCfg:
     #         "lid_cfg": SceneEntityCfg("lid"),
     #         "robot_cfg": SceneEntityCfg("Mindbot"),
     #         "gripper_body_name": "left_hand_body",
-    #         "gripper_forward_axis": (0.0, 0.0, 1.0), 
-    #         "gripper_width_axis": (0.0, 1.0, 0.0), 
+    #         "gripper_forward_axis": (0.0, 0.0, 1.0),
+    #         "gripper_width_axis": (0.0, 1.0, 0.0),
     #         "lid_handle_axis": (0.0, 1.0, 0.0)
     #     },
     #     weight=1.0
@@ -305,12 +350,12 @@ class RewardsCfg:
             "lid_cfg": SceneEntityCfg("lid"),
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "gripper_body_name": "left_hand_body",
-            "gripper_forward_axis": (0.0, 0.0, 1.0), 
-            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "gripper_forward_axis": (0.0, 0.0, 1.0),
+            "gripper_width_axis": (0.0, 1.0, 0.0),
             "lid_handle_axis": (0.0, 1.0, 0.0),
             "max_angle_deg": 85.0,  # 允许60度内的偏差
         },
-        weight=5
+        weight=5,
     )
 
     # stage 2
@@ -325,7 +370,7 @@ class RewardsCfg:
             "height_offset": 0.07,  # Z方向：lid 上方 0.1m
             "std": 0.3,  # 位置对齐的容忍度
         },
-        weight=3.0
+        weight=3.0,
     )
 
     # 【新增】精细对齐 (引导进入 2cm 圈)
@@ -336,13 +381,13 @@ class RewardsCfg:
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "left_gripper_name": "left_hand__l",
             "right_gripper_name": "left_hand_r",
-            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
-            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+            "height_offset": 0.07,  # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05,  # 非常陡峭的梯度，只在 5-10cm 内有效
         },
-        weight=10.0 # 高权重
+        weight=10.0,  # 高权重
     )
 
-    gripper_close_interaction = RewTerm( 
+    gripper_close_interaction = RewTerm(
         func=mdp.gripper_close_when_near,
         params={
             "lid_cfg": SceneEntityCfg("lid"),
@@ -354,34 +399,32 @@ class RewardsCfg:
             "height_offset": 0.04,
             "grasp_radius": 0.03,
         },
-        weight=10.0
+        weight=10.0,
     )
 
     lid_lifted_reward = RewTerm(
-        func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+        func=mdp.lid_is_lifted,  # 使用 rewards.py 中已有的函数
         params={
             "lid_cfg": SceneEntityCfg("lid"),
-            "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+            "minimal_height": 1.0,  # 根据你的场景调整，比初始高度高 5cm 左右
         },
-        weight=10.0 # 给一个足够大的权重
+        weight=10.0,  # 给一个足够大的权重
     )
 
     lid_lifting_reward = RewTerm(
         func=mdp.lid_lift_progress_reward,
         params={
             "lid_cfg": SceneEntityCfg("lid"),
-            "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+            "initial_height": 0.8,  # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
             "target_height_lift": 0.2,
-            "height_offset": 0.07, # 与其他奖励保持一致
-            "std": 0.1
+            "height_offset": 0.07,  # 与其他奖励保持一致
+            "std": 0.1,
         },
         # 权重设大一点，告诉它这是最终目标
-        weight=10.0 
+        weight=10.0,
     )
 
 
-
-
 @configclass
 class TerminationsCfg:
     """Termination terms for the MDP."""
@@ -392,7 +435,7 @@ class TerminationsCfg:
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
     )
-    
+
     lid_fly_away = DoneTerm(
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
@@ -401,25 +444,25 @@ class TerminationsCfg:
     # 新增：盖子掉落判定
     # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
     lid_dropped = DoneTerm(
-        func=mdp.base_height_failure, # 复用高度判定函数
+        func=mdp.base_height_failure,  # 复用高度判定函数
         params={
-            "asset_cfg": SceneEntityCfg("lid"), 
-            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+            "asset_cfg": SceneEntityCfg("lid"),
+            "minimum_height": 0.79,  # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
         },
     )
-    
 
 
 ##
 # Environment configuration
 ##
 
+
 @configclass
 class CurriculumCfg:
     pass
     # action_rate = CurrTerm(func=mdp.modify_reward_weight,
     #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
-                           
+
     # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
     #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
 
@@ -428,10 +471,10 @@ class CurriculumCfg:
     #     params={
     #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
     #         "address": "rewards.gripper_lid_position_alignment.params.std",
-            
+
     #         # 修改逻辑：使用我们刚才写的函数
     #         "modify_fn": mdp.annealing_std,
-            
+
     #         # 传给函数的参数
     #         "modify_params": {
     #             "start_step": 00, # 约 600 轮
@@ -442,6 +485,7 @@ class CurriculumCfg:
     #     }
     # )
 
+
 @configclass
 class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     # Scene settings
@@ -459,20 +503,20 @@ class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     def __post_init__(self) -> None:
         """Post initialization."""
         # general settings
-        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.decimation = 4  # original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
         self.episode_length_s = 10
         # viewer settings
         self.viewer.eye = (3.5, 0.0, 3.2)
         # simulation settings
-        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.dt = 1 / 120  # original 1 / 60
         self.sim.render_interval = self.decimation
         # # 1. 基础堆内存
         self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
-        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
         self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
-        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
-        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
-        
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024  # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024  # 32万个 patch
+
         # # 5. 聚合对容量 (针对复杂的 Articulation)
         self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
         self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/rl_games_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/rl_games_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/rl_games_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/rsl_rl_ppo_cfg.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/rsl_rl_ppo_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/rsl_rl_ppo_cfg.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/sb3_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/sb3_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/sb3_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_amp_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_amp_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_amp_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_ippo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_ippo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_ippo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_mappo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_mappo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_mappo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/agents/skrl_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/agents/skrl_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/curriculums.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/curriculums.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/curriculums.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/gripper.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/gripper.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/gripper.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py.bak
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py.bak
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1 b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py.bak1
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/rewards.py.bak1
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/rewards.py.bak1
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/terminations.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/mdp/terminations.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/mdp/terminations.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/pull/pull_env_cfg.py
similarity index 82%
rename from source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/pull_env_cfg.py
index 41c6b8c..5367158 100644
--- a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/rl/pull/pull_env_cfg.py
@@ -21,22 +21,28 @@ from isaaclab.managers import CurriculumTermCfg as CurrTerm
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.utils import configclass
 from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
-from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.envs.mdp.actions.actions_cfg import (
+    DifferentialInverseKinematicsActionCfg,
+    BinaryJointPositionActionCfg,
+)
 
 from . import mdp
 
 ##
 # Pre-defined configs
 ##
- 
+
 from mindbot.robot.mindbot import MINDBOT_CFG
 
 # ====== 其他物体配置 ======
 from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
-from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+from isaaclab.sim.schemas.schemas_cfg import (
+    RigidBodyPropertiesCfg,
+    CollisionPropertiesCfg,
+)
 
 
-TABLE_CFG=RigidObjectCfg(
+TABLE_CFG = RigidObjectCfg(
     prim_path="{ENV_REGEX_NS}/Table",
     init_state=RigidObjectCfg.InitialStateCfg(
         pos=[0.95, -0.3, 0.005],
@@ -45,30 +51,29 @@ TABLE_CFG=RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/table/Table_C.usd",
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
 
 DRYINGBOX_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/assets/Collected_Equipment_BB_0B/Equipment/Equipment_BB_13.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False, 
-            max_depenetration_velocity=1.0
+            disable_gravity=False, max_depenetration_velocity=1.0
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False,#
+            enabled_self_collisions=False,  #
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             stabilization_threshold=1e-6,
@@ -76,21 +81,19 @@ DRYINGBOX_CFG = ArticulationCfg(
         ),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
-        pos=[0.95, -0.45, 0.9],
-        rot=[-0.7071, 0.0, 0.0, 0.7071]
-        ),
+        pos=[0.95, -0.45, 0.9], rot=[-0.7071, 0.0, 0.0, 0.7071]
+    ),
     # actuators={}
     actuators={
         "passive_damper": ImplicitActuatorCfg(
             # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
             joint_names_expr=["RevoluteJoint"],
-            
-            stiffness=10000.0, 
-            damping=1000.0, 
-            effort_limit_sim=10000.0, 
+            stiffness=10000.0,
+            damping=1000.0,
+            effort_limit_sim=10000.0,
             velocity_limit_sim=100.0,
         ),
-    }
+    },
 )
 
 LID_CFG = RigidObjectCfg(
@@ -102,23 +105,23 @@ LID_CFG = RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
         # scale=(0.2, 0.2, 0.2),
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             max_angular_velocity=1000.0,
             max_linear_velocity=1000.0,
-            max_depenetration_velocity=0.5,#original 5.0
-            linear_damping=5.0, #original 0.5
-            angular_damping=5.0, #original 0.5
+            max_depenetration_velocity=0.5,  # original 5.0
+            linear_damping=5.0,  # original 0.5
+            angular_damping=5.0,  # original 0.5
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -126,13 +129,12 @@ LID_CFG = RigidObjectCfg(
 
 ULTRASOUND_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False, 
-            max_depenetration_velocity=1.0
+            disable_gravity=False, max_depenetration_velocity=1.0
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False,#
+            enabled_self_collisions=False,  #
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             stabilization_threshold=1e-6,
@@ -145,13 +147,12 @@ ULTRASOUND_CFG = ArticulationCfg(
         "passive_damper": ImplicitActuatorCfg(
             # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
             joint_names_expr=[".*"],
-            
-            stiffness=0.0, 
-            damping=1000.0, 
-            effort_limit_sim=10000.0, 
+            stiffness=0.0,
+            damping=1000.0,
+            effort_limit_sim=10000.0,
             velocity_limit_sim=100.0,
         ),
-    }
+    },
 )
 
 ##
@@ -160,6 +161,7 @@ ULTRASOUND_CFG = ArticulationCfg(
 from isaaclab.sensors import CameraCfg
 import isaaclab.sim as sim_utils
 
+
 @configclass
 class PullSceneCfg(InteractiveSceneCfg):
     """Configuration for a cart-pole scene."""
@@ -173,7 +175,9 @@ class PullSceneCfg(InteractiveSceneCfg):
     # robot
     Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
     table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
-    dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(prim_path="{ENV_REGEX_NS}/Dryingbox")
+    dryingbox: ArticulationCfg = DRYINGBOX_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/Dryingbox"
+    )
     # ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
     # lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
 
@@ -291,17 +295,22 @@ class PullSceneCfg(InteractiveSceneCfg):
 # MDP settings
 ##
 
-def right_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+
+def right_gripper_pos_w(
+    env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")
+) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
     body_ids, _ = asset.find_bodies(["right_hand_body"], preserve_order=True)
     pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
     return pos_w
 
+
 def get_body_pos_rel(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
     body_ids, _ = asset.find_bodies(asset_cfg.body_names, preserve_order=True)
     return asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
 
+
 # 新增：通用获取 body 世界姿态的函数
 def get_body_quat_w(env, asset_cfg: SceneEntityCfg) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
@@ -317,16 +326,15 @@ class ActionsCfg:
     right_arm_ee = DifferentialInverseKinematicsActionCfg(
         asset_name="Mindbot",
         joint_names=["r_joint[1-6]"],  # 左臂的6个关节
-        body_name="right_hand_body",     # 末端执行器body名称
+        body_name="right_hand_body",  # 末端执行器body名称
         controller=DifferentialIKControllerCfg(
-            command_type="pose",        # 控制位置+姿态
-            use_relative_mode=True,     # 相对模式：动作是增量
-            ik_method="dls",            # Damped Least Squares方法
+            command_type="pose",  # 控制位置+姿态
+            use_relative_mode=True,  # 相对模式：动作是增量
+            ik_method="dls",  # Damped Least Squares方法
         ),
         scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
     )
 
-
     grippers_position = mdp.BinaryJointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["right_hand_joint_left", "right_hand_joint_right"],
@@ -334,7 +342,10 @@ class ActionsCfg:
         # open_command_expr={"关节名正则": 值}
         open_command_expr={"right_hand_joint_left": 0.0, "right_hand_joint_right": 0.0},
         # close_command_expr={"关节名正则": 值}
-        close_command_expr={"right_hand_joint_left": -0.03, "right_hand_joint_right": 0.03},
+        close_command_expr={
+            "right_hand_joint_left": -0.03,
+            "right_hand_joint_right": 0.03,
+        },
     )
 
     trunk_position = mdp.JointPositionActionCfg(
@@ -342,7 +353,7 @@ class ActionsCfg:
         joint_names=["PrismaticJoint"],
         scale=0.00,
         offset=0.25,
-        clip=None
+        clip=None,
     )
 
 
@@ -354,23 +365,41 @@ class ObservationsCfg:
     class PolicyCfg(ObsGroup):
         """Observations for policy group."""
 
-        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        
-        right_gripper_pos = ObsTerm(func=right_gripper_pos_w,
-                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])})
+        mindbot_joint_pos_rel = ObsTerm(
+            func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_pos = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_quat = ObsTerm(
+            func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+
+        right_gripper_pos = ObsTerm(
+            func=right_gripper_pos_w,
+            params={
+                "asset_cfg": SceneEntityCfg("Mindbot", body_names=["right_hand_body"])
+            },
+        )
         dryingbox_handle_pos = ObsTerm(
-            func=get_body_pos_rel, 
-            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+            func=get_body_pos_rel,
+            params={
+                "asset_cfg": SceneEntityCfg(
+                    "dryingbox", body_names=["Equipment_BB_13_C"]
+                )
+            },
         )
         dryingbox_handle_quat = ObsTerm(
-            func=get_body_quat_w, 
-            params={"asset_cfg": SceneEntityCfg("dryingbox", body_names=["Equipment_BB_13_C"])}
+            func=get_body_quat_w,
+            params={
+                "asset_cfg": SceneEntityCfg(
+                    "dryingbox", body_names=["Equipment_BB_13_C"]
+                )
+            },
         )
         # lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
         # ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
-        
+
         def __post_init__(self) -> None:
             self.enable_corruption = False
             self.concatenate_terms = True
@@ -382,6 +411,7 @@ class ObservationsCfg:
 @configclass
 class EventCfg:
     """Configuration for events."""
+
     # 重置所有关节到 init_state（无偏移）
     reset_mindbot_all_joints = EventTerm(
         func=mdp.reset_joints_by_offset,
@@ -402,15 +432,30 @@ class EventCfg:
             "velocity_range": (0.0, 0.0),
         },
     )
-    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
-        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
-    reset_dryingbox=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
-        params={"asset_cfg": SceneEntityCfg("dryingbox"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+    reset_table = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("table"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
+    reset_dryingbox = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("dryingbox"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
 
 
 @configclass
 class RewardsCfg:
     """Reward terms for the MDP."""
+
     # 1. 姿态对齐：保持不变，这是基础
     gripper_handle_orientation_alignment = RewTerm(
         func=mdp.gripper_handle_orientation_alignment,
@@ -419,12 +464,12 @@ class RewardsCfg:
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "handle_name": "Equipment_BB_13_C",
             "gripper_body_name": "right_hand_body",
-            "gripper_forward_axis": (0.0, 0.0, 1.0), 
-            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "gripper_forward_axis": (0.0, 0.0, 1.0),
+            "gripper_width_axis": (0.0, 1.0, 0.0),
             "lid_handle_axis": (0.0, 0.0, 1.0),
-            "max_angle_deg": 60.0, # 稍微放宽一点，或者保持 30.0，看前期训练难度
+            "max_angle_deg": 60.0,  # 稍微放宽一点，或者保持 30.0，看前期训练难度
         },
-        weight=4.0 
+        weight=4.0,
     )
     # 2. 位置对齐 (合并了之前的粗略和精细对齐)
     # std=0.05 提供了从 15cm 到 0cm 全程较好的梯度，解决了之前的梯度消失问题
@@ -433,13 +478,13 @@ class RewardsCfg:
         params={
             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
             "robot_cfg": SceneEntityCfg("Mindbot"),
-            "handle_name":"Equipment_BB_13_C",
+            "handle_name": "Equipment_BB_13_C",
             "left_gripper_name": "right_hand_l",
             "right_gripper_name": "right_hand__r",
             "height_offset": 0.00,
             "std": 0.05,  # 关键修改：0.3太松，0.03太紧导致梯度消失，0.05是平衡点
         },
-        weight=6.0 # 提高权重，作为主导奖励
+        weight=6.0,  # 提高权重，作为主导奖励
     )
     # 3. 远距离闭合惩罚 (逻辑已修正)
     # 现在它只惩罚“在远处就闭眼”的行为，不会阻挡机器人靠近
@@ -452,27 +497,27 @@ class RewardsCfg:
             "left_gripper_name": "right_hand_l",
             "right_gripper_name": "right_hand__r",
             "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
-            "dist_threshold": 0.06, # 2.5cm 保护区，进入后允许闭合
+            "dist_threshold": 0.06,  # 2.5cm 保护区，进入后允许闭合
         },
-        weight=0.5 # 权重降低，作为辅助约束，不要喧宾夺主
+        weight=0.5,  # 权重降低，作为辅助约束，不要喧宾夺主
     )
     # ... 在 RewardsCfg 类中修改 ...
 
     # 4. 近距离抓取交互 (改用 V2 版本或增加稳定性项)
-    gripper_close_interaction = RewTerm( 
-        func=mdp.gripper_close_interaction_v2, # 切换到带稳定性的版本
+    gripper_close_interaction = RewTerm(
+        func=mdp.gripper_close_interaction_v2,  # 切换到带稳定性的版本
         params={
             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
             "robot_cfg": SceneEntityCfg("Mindbot"),
-            "handle_name":"Equipment_BB_13_C",
+            "handle_name": "Equipment_BB_13_C",
             "left_gripper_name": "right_hand_l",
             "right_gripper_name": "right_hand__r",
             "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
-            "target_close_pos": 0.012, 
+            "target_close_pos": 0.012,
             "height_offset": 0.00,
             "grasp_radius": 0.05,
         },
-        weight=8.0 # 适当调低一点点，为稳定性奖励腾出空间
+        weight=8.0,  # 适当调低一点点，为稳定性奖励腾出空间
     )
 
     # 5. 新增：抓取稳定性奖励 (核心：惩罚抖动)
@@ -483,10 +528,10 @@ class RewardsCfg:
             "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
             "vel_threshold": 0.005,
         },
-        weight=4.0 # 只要保持静止就一直给分
+        weight=4.0,  # 只要保持静止就一直给分
     )
     # 4. 近距离抓取交互 (修正了目标值)
-    # gripper_close_interaction = RewTerm( 
+    # gripper_close_interaction = RewTerm(
     #     func=mdp.gripper_close_when_near,
     #     params={
     #         "dryingbox_cfg": SceneEntityCfg("dryingbox"),
@@ -495,19 +540,20 @@ class RewardsCfg:
     #         "left_gripper_name": "right_hand_l",
     #         "right_gripper_name": "right_hand__r",
     #         "joint_names": ["right_hand_joint_left", "right_hand_joint_right"],
-            
+
     #         # 关键修改：物理接触点计算
     #         # 夹爪空载全闭是0.03 (对应0cm)
     #         # 把手宽3.88cm -> 单侧行程 (6-3.88)/2 = 1.06cm = 0.0106m
     #         # 设为 0.012 确保接触时计算出的奖励是满分 (ratio >= 1.0)
-    #         "target_close_pos": 0.012, 
-            
+    #         "target_close_pos": 0.012,
+
     #         "height_offset": 0.00,
     #         "grasp_radius": 0.05,
     #     },
     #     weight=10.0 # 只有成功抓到才给的大奖
     # )
 
+
 # @configclass
 # class RewardsCfg:
 #     """Reward terms for the MDP."""
@@ -519,8 +565,8 @@ class RewardsCfg:
 #             "robot_cfg": SceneEntityCfg("Mindbot"),
 #             "handle_name": "Equipment_BB_13_C",
 #             "gripper_body_name": "right_hand_body",
-#             "gripper_forward_axis": (0.0, 0.0, 1.0), 
-#             "gripper_width_axis": (0.0, 1.0, 0.0), 
+#             "gripper_forward_axis": (0.0, 0.0, 1.0),
+#             "gripper_width_axis": (0.0, 1.0, 0.0),
 #             "lid_handle_axis": (0.0, 0.0, 1.0),
 #             # 删除了 handle_axis，因为新函数中不再使用它
 #             "max_angle_deg": 30.0,  # 建议改为 30.0 或更小，85.0 对指向性约束来说太宽松
@@ -530,7 +576,7 @@ class RewardsCfg:
 
 #     # stage 2
 #     # 位置对齐奖励：鼓励夹爪位于 lid 两侧，且 Z 方向在 lid 上方 0.1m
-    
+
 #     gripper_handle_position_alignment = RewTerm(
 #         func=mdp.gripper_handle_position_alignment,
 #         params={
@@ -596,7 +642,7 @@ class RewardsCfg:
 #     #     },
 #     #     weight=10.0
 #     # )
-#     gripper_close_interaction = RewTerm( 
+#     gripper_close_interaction = RewTerm(
 #         func=mdp.gripper_close_when_near,
 #         params={
 #             "dryingbox_cfg": SceneEntityCfg("dryingbox"),
@@ -631,12 +677,10 @@ class RewardsCfg:
 #     #         "std": 0.1
 #     #     },
 #     #     # 权重设大一点，告诉它这是最终目标
-#     #     weight=10.0 
+#     #     weight=10.0
 #     # )
 
 
-
-
 @configclass
 class TerminationsCfg:
     """Termination terms for the MDP."""
@@ -647,7 +691,7 @@ class TerminationsCfg:
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
     )
-    
+
     # lid_fly_away = DoneTerm(
     #     func=mdp.base_height_failure,
     #     params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
@@ -658,23 +702,23 @@ class TerminationsCfg:
     # lid_dropped = DoneTerm(
     #     func=mdp.base_height_failure, # 复用高度判定函数
     #     params={
-    #         "asset_cfg": SceneEntityCfg("lid"), 
+    #         "asset_cfg": SceneEntityCfg("lid"),
     #         "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
     #     },
     # )
-    
 
 
 ##
 # Environment configuration
 ##
 
+
 @configclass
 class CurriculumCfg:
     pass
     # action_rate = CurrTerm(func=mdp.modify_reward_weight,
     #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
-                           
+
     # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
     #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
 
@@ -683,10 +727,10 @@ class CurriculumCfg:
     #     params={
     #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
     #         "address": "rewards.gripper_lid_position_alignment.params.std",
-            
+
     #         # 修改逻辑：使用我们刚才写的函数
     #         "modify_fn": mdp.annealing_std,
-            
+
     #         # 传给函数的参数
     #         "modify_params": {
     #             "start_step": 00, # 约 600 轮
@@ -697,6 +741,7 @@ class CurriculumCfg:
     #     }
     # )
 
+
 @configclass
 class PullEnvCfg(ManagerBasedRLEnvCfg):
     # Scene settings
@@ -714,20 +759,20 @@ class PullEnvCfg(ManagerBasedRLEnvCfg):
     def __post_init__(self) -> None:
         """Post initialization."""
         # general settings
-        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.decimation = 4  # original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
         self.episode_length_s = 10
         # viewer settings
         self.viewer.eye = (3.5, 0.0, 3.2)
         # simulation settings
-        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.dt = 1 / 120  # original 1 / 60
         self.sim.render_interval = self.decimation
         # # 1. 基础堆内存
         self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
-        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
         self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
-        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
-        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
-        
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024  # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024  # 32万个 patch
+
         # # 5. 聚合对容量 (针对复杂的 Articulation)
         self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
         self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024
diff --git a/source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak b/source/mindbot/mindbot/tasks/manager_based/rl/pull/pull_env_cfg.py.bak
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pull/pull_env_cfg.py.bak
rename to source/mindbot/mindbot/tasks/manager_based/rl/pull/pull_env_cfg.py.bak
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/rl_games_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/rsl_rl_ppo_cfg.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/sb3_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_amp_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_ippo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_mappo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/agents/skrl_ppo_cfg.yaml
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/__init__.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/__init__.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/__init__.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/curriculums.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/curriculums.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/curriculums.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/gripper.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/gripper.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/gripper.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/rewards.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/rewards.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/rewards.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/terminations.py
similarity index 100%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mdp/terminations.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mdp/terminations.py
diff --git a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mindbot_env_cfg.py
similarity index 74%
rename from source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py
rename to source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mindbot_env_cfg.py
index 72c1bac..b8a4c51 100644
--- a/source/mindbot/mindbot/tasks/manager_based/pullUltrasoundLidUp/mindbot_env_cfg.py
+++ b/source/mindbot/mindbot/tasks/manager_based/rl/pullUltrasoundLidUp/mindbot_env_cfg.py
@@ -21,7 +21,10 @@ from isaaclab.managers import CurriculumTermCfg as CurrTerm
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.utils import configclass
 from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
-from isaaclab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg, BinaryJointPositionActionCfg
+from isaaclab.envs.mdp.actions.actions_cfg import (
+    DifferentialInverseKinematicsActionCfg,
+    BinaryJointPositionActionCfg,
+)
 from isaaclab.sensors import CameraCfg
 
 from . import mdp
@@ -34,10 +37,13 @@ from mindbot.robot.mindbot import MINDBOT_CFG
 
 # ====== 其他物体配置 ======
 from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
-from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg, CollisionPropertiesCfg
+from isaaclab.sim.schemas.schemas_cfg import (
+    RigidBodyPropertiesCfg,
+    CollisionPropertiesCfg,
+)
 
 
-TABLE_CFG=RigidObjectCfg(
+TABLE_CFG = RigidObjectCfg(
     prim_path="{ENV_REGEX_NS}/Table",
     init_state=RigidObjectCfg.InitialStateCfg(
         pos=[0.95, -0.3, 0.005],
@@ -46,17 +52,17 @@ TABLE_CFG=RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/table/Table_C.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/table/Table_C.usd",
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -70,23 +76,23 @@ LID_CFG = RigidObjectCfg(
         ang_vel=[0.0, 0.0, 0.0],
     ),
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound_lid.usd",
         # scale=(0.2, 0.2, 0.2),
         rigid_props=RigidBodyPropertiesCfg(
-            rigid_body_enabled= True,
+            rigid_body_enabled=True,
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             max_angular_velocity=1000.0,
             max_linear_velocity=1000.0,
-            max_depenetration_velocity=0.5,#original 5.0
-            linear_damping=5.0, #original 0.5
-            angular_damping=5.0, #original 0.5
+            max_depenetration_velocity=0.5,  # original 5.0
+            linear_damping=5.0,  # original 0.5
+            angular_damping=5.0,  # original 0.5
             disable_gravity=False,
         ),
         collision_props=CollisionPropertiesCfg(
             collision_enabled=True,
-            contact_offset=0.0005,#original 0.02
-            rest_offset=0
+            contact_offset=0.0005,  # original 0.02
+            rest_offset=0,
         ),
     ),
 )
@@ -94,13 +100,13 @@ LID_CFG = RigidObjectCfg(
 ROOM_CFG = AssetBaseCfg(
     prim_path="{ENV_REGEX_NS}/Room",
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
     ),
 )
 ROOM_CFG = AssetBaseCfg(
     prim_path="{ENV_REGEX_NS}/Room",
     spawn=UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/Collected_lab2/lab.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/twinlab/Collected_lab2/lab.usd",
     ),
 )
 
@@ -113,7 +119,7 @@ ROOM_CFG = AssetBaseCfg(
 #         ang_vel=[0.0, 0.0, 0.0],
 #     ),
 #     spawn=UsdFileCfg(
-#         usd_path="C:/Users/PC/workpalce/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
+#         usd_path="/home/maic/LYT/maic_usd_assets/twinlab/MIC_sim-3.0/244_140/room.usd",
 #         # scale=(0.2, 0.2, 0.2),
 #         rigid_props=RigidBodyPropertiesCfg(
 #             rigid_body_enabled= True,
@@ -135,16 +141,14 @@ ROOM_CFG = AssetBaseCfg(
 # )
 
 
-
 ULTRASOUND_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="C:/Users/PC/workpalce/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
+        usd_path="/home/maic/LYT/maic_usd_assets/equipment/ultrasound/ultrasound.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False, 
-            max_depenetration_velocity=1.0
+            disable_gravity=False, max_depenetration_velocity=1.0
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False,#
+            enabled_self_collisions=False,  #
             solver_position_iteration_count=32,
             solver_velocity_iteration_count=16,
             stabilization_threshold=1e-6,
@@ -157,13 +161,12 @@ ULTRASOUND_CFG = ArticulationCfg(
         "passive_damper": ImplicitActuatorCfg(
             # ".*" 表示匹配该USD文件内的所有关节（无论是轮子、屏幕转轴还是其他）
             joint_names_expr=[".*"],
-            
-            stiffness=0.0, 
-            damping=1000.0, 
-            effort_limit_sim=10000.0, 
+            stiffness=0.0,
+            damping=1000.0,
+            effort_limit_sim=10000.0,
             velocity_limit_sim=100.0,
         ),
-    }
+    },
 )
 
 ##
@@ -185,7 +188,9 @@ class MindbotSceneCfg(InteractiveSceneCfg):
     Room: AssetBaseCfg = ROOM_CFG.replace(prim_path="{ENV_REGEX_NS}/Room")
     Mindbot: ArticulationCfg = MINDBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Mindbot")
     table: RigidObjectCfg = TABLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Table")
-    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(prim_path="{ENV_REGEX_NS}/ultrasound")
+    ultrasound: ArticulationCfg = ULTRASOUND_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/ultrasound"
+    )
     lid: RigidObjectCfg = LID_CFG.replace(prim_path="{ENV_REGEX_NS}/Lid")
     # lights
     dome_light = AssetBaseCfg(
@@ -193,50 +198,55 @@ class MindbotSceneCfg(InteractiveSceneCfg):
         spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
     )
     # head_camera=CameraCfg(
-    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
+    #     prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
     #     update_period=1/120,
     #     height=480,
     #     width=640,
     #     data_type=["rgb"],
     #     spawn=None,
     # )
-    left_hand_camera=CameraCfg(
-        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand", 
-        update_period=1/120,
+    left_hand_camera = CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_fb_left/Link_6/robot___left_8_02/cam_left_hand",
+        update_period=1 / 120,
         height=480,
         width=640,
         data_types=["rgb"],
         spawn=None,
     )
-    right_hand_camera=CameraCfg(
-        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand", 
-        update_period=1/120,
+    right_hand_camera = CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/rm_65_b_right/Link6/robot___right_8_2/cam_right_hand",
+        update_period=1 / 120,
         height=480,
         width=640,
         data_types=["rgb"],
         spawn=None,
     )
-    head_camera=CameraCfg(
-        prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head", 
-        update_period=1/120,
+    head_camera = CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/robot_head/cam_head",
+        update_period=1 / 120,
         height=480,
         width=640,
         data_types=["rgb"],
         spawn=None,
     )
-    chest_camera=CameraCfg(
-        prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest", 
-        update_period=1/120,
+    chest_camera = CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Mindbot/robot_trunk/cam_chest",
+        update_period=1 / 120,
         height=480,
         width=640,
         data_types=["rgb"],
         spawn=None,
     )
+
+
 # # ##
 # MDP settings
 ##
 
-def left_gripper_pos_w(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")) -> torch.Tensor:
+
+def left_gripper_pos_w(
+    env, asset_cfg: SceneEntityCfg = SceneEntityCfg("Mindbot")
+) -> torch.Tensor:
     asset = env.scene[asset_cfg.name]
     body_ids, _ = asset.find_bodies(["left_hand_body"], preserve_order=True)
     pos_w = asset.data.body_pos_w[:, body_ids[0]] - env.scene.env_origins
@@ -251,22 +261,22 @@ class ActionsCfg:
     left_arm_ee = DifferentialInverseKinematicsActionCfg(
         asset_name="Mindbot",
         joint_names=["l_joint[1-6]"],  # 左臂的6个关节
-        body_name="left_hand_body",     # 末端执行器body名称
+        body_name="left_hand_body",  # 末端执行器body名称
         controller=DifferentialIKControllerCfg(
-            command_type="pose",        # 控制位置+姿态
-            use_relative_mode=True,     # 相对模式：动作是增量
-            ik_method="dls",            # Damped Least Squares方法
+            command_type="pose",  # 控制位置+姿态
+            use_relative_mode=True,  # 相对模式：动作是增量
+            ik_method="dls",  # Damped Least Squares方法
         ),
         scale=(0.01, 0.01, 0.01, 0.025, 0.025, 0.025),
     )
-    
+
     # right_arm_fixed = mdp.JointPositionActionCfg(
     #     asset_name="Mindbot",
     #     joint_names=["r_joint[1-6]"], # 对应 l_joint1 到 l_joint6
-        
+
     #     # 1. 缩放设为 0：这让 RL 策略无法控制它，它不会动
-    #     scale=0.0, 
-        
+    #     scale=0.0,
+
     #     # 2. 偏移设为你的目标角度（弧度）：这告诉电机“永远停在这里”
     #     # 对应 (135, -70, -90, 90, 90, -70)
     #     offset={
@@ -282,22 +292,22 @@ class ActionsCfg:
     grippers_position = mdp.BinaryJointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["left_hand_joint_left", "left_hand_joint_right"],
-        
         # 修正：使用字典格式
         # open_command_expr={"关节名正则": 值}
         open_command_expr={"left_hand_joint_left": 0.0, "left_hand_joint_right": 0.0},
-        
         # close_command_expr={"关节名正则": 值}
-        close_command_expr={"left_hand_joint_left": -0.03, "left_hand_joint_right": 0.03},
+        close_command_expr={
+            "left_hand_joint_left": -0.03,
+            "left_hand_joint_right": 0.03,
+        },
     )
 
-
     trunk_position = mdp.JointPositionActionCfg(
         asset_name="Mindbot",
         joint_names=["PrismaticJoint"],
         scale=0.00,
         offset=0.3,
-        clip=None
+        clip=None,
     )
 
 
@@ -309,15 +319,29 @@ class ObservationsCfg:
     class PolicyCfg(ObsGroup):
         """Observations for policy group."""
 
-        mindbot_joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_pos = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        mindbot_root_quat = ObsTerm(func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")})
-        
-        left_gripper_pos = ObsTerm(func=left_gripper_pos_w,
-                                   params={"asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])})
-        lid_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")})
-        ultrasound_pos_abs = ObsTerm(func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")})
-        
+        mindbot_joint_pos_rel = ObsTerm(
+            func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_pos = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+        mindbot_root_quat = ObsTerm(
+            func=mdp.root_quat_w, params={"asset_cfg": SceneEntityCfg("Mindbot")}
+        )
+
+        left_gripper_pos = ObsTerm(
+            func=left_gripper_pos_w,
+            params={
+                "asset_cfg": SceneEntityCfg("Mindbot", body_names=["left_hand_body"])
+            },
+        )
+        lid_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("lid")}
+        )
+        ultrasound_pos_abs = ObsTerm(
+            func=mdp.root_pos_w, params={"asset_cfg": SceneEntityCfg("ultrasound")}
+        )
+
         def __post_init__(self) -> None:
             self.enable_corruption = False
             self.concatenate_terms = True
@@ -365,27 +389,34 @@ class EventCfg:
             "velocity_range": (0.0, 0.0),
         },
     )
-    
-    reset_table=EventTerm(func=mdp.reset_root_state_uniform,mode="reset",
-        params={"asset_cfg": SceneEntityCfg("table"), "pose_range": {"x": (0.0, 0.0)}, "velocity_range": {"x": (0.0, 0.0)}})
+
+    reset_table = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("table"),
+            "pose_range": {"x": (0.0, 0.0)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
     reset_ultrasound = EventTerm(
-        func=mdp.reset_root_state_uniform, 
+        func=mdp.reset_root_state_uniform,
         mode="reset",
         params={
-            "asset_cfg": SceneEntityCfg("ultrasound"), 
-            "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)}, 
-            "velocity_range": {"x": (0.0, 0.0)}
-            }
-        )
+            "asset_cfg": SceneEntityCfg("ultrasound"),
+            "pose_range": {"x": (-0.07, 0.07), "y": (-0.07, 0.07), "yaw": (-0.1, 0.1)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
     reset_lid = EventTerm(
-        func=mdp.reset_root_state_uniform, 
+        func=mdp.reset_root_state_uniform,
         mode="reset",
         params={
-            "asset_cfg": SceneEntityCfg("lid"), 
-            "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)}, 
-            "velocity_range": {"x": (0.0, 0.0)}
-            }
-        )
+            "asset_cfg": SceneEntityCfg("lid"),
+            "pose_range": {"x": (-0.03, 0.03), "y": (-0.03, 0.03), "yaw": (-1.5, 1.5)},
+            "velocity_range": {"x": (0.0, 0.0)},
+        },
+    )
 
 
 @configclass
@@ -398,12 +429,12 @@ class RewardsCfg:
             "lid_cfg": SceneEntityCfg("lid"),
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "gripper_body_name": "left_hand_body",
-            "gripper_forward_axis": (0.0, 0.0, 1.0), 
-            "gripper_width_axis": (0.0, 1.0, 0.0), 
+            "gripper_forward_axis": (0.0, 0.0, 1.0),
+            "gripper_width_axis": (0.0, 1.0, 0.0),
             "lid_handle_axis": (0.0, 1.0, 0.0),
             "max_angle_deg": 85.0,  # 允许60度内的偏差
         },
-        weight=5.0 #original 5.0
+        weight=5.0,  # original 5.0
     )
 
     # stage 2
@@ -416,9 +447,9 @@ class RewardsCfg:
             "left_gripper_name": "left_hand__l",
             "right_gripper_name": "left_hand_r",
             "height_offset": 0.07,  # Z方向：lid 上方 0.1m
-            "std": 0.3,  # 位置对齐的容忍度    
+            "std": 0.3,  # 位置对齐的容忍度
         },
-        weight=3.0 #original 3.0
+        weight=3.0,  # original 3.0
     )
     approach_lid_penalty = RewTerm(
         func=mdp.gripper_lid_distance_penalty,
@@ -431,7 +462,7 @@ class RewardsCfg:
         },
         # weight 为负数表示惩罚。
         # 假设距离 0.5m，惩罚就是 -0.5 * 2.0 = -1.0 分。
-        weight=-1.0 
+        weight=-1.0,
     )
     # 【新增】精细对齐 (引导进入 2cm 圈)
     gripper_lid_fine_alignment = RewTerm(
@@ -441,13 +472,13 @@ class RewardsCfg:
             "robot_cfg": SceneEntityCfg("Mindbot"),
             "left_gripper_name": "left_hand__l",
             "right_gripper_name": "left_hand_r",
-            "height_offset": 0.07, # 【注意】这个高度必须非常准，和 interaction 判定高度一致
-            "std": 0.05, # 非常陡峭的梯度，只在 5-10cm 内有效
+            "height_offset": 0.07,  # 【注意】这个高度必须非常准，和 interaction 判定高度一致
+            "std": 0.05,  # 非常陡峭的梯度，只在 5-10cm 内有效
         },
-        weight=10.0 # 高权重
+        weight=10.0,  # 高权重
     )
 
-    gripper_close_interaction = RewTerm( 
+    gripper_close_interaction = RewTerm(
         func=mdp.gripper_close_when_near,
         params={
             "lid_cfg": SceneEntityCfg("lid"),
@@ -459,34 +490,32 @@ class RewardsCfg:
             "height_offset": 0.04,
             "grasp_radius": 0.03,
         },
-        weight=10.0
+        weight=10.0,
     )
 
     lid_lifted_reward = RewTerm(
-        func=mdp.lid_is_lifted, # 使用 rewards.py 中已有的函数
+        func=mdp.lid_is_lifted,  # 使用 rewards.py 中已有的函数
         params={
             "lid_cfg": SceneEntityCfg("lid"),
-            "minimal_height": 1.0, # 根据你的场景调整，比初始高度高 5cm 左右
+            "minimal_height": 1.0,  # 根据你的场景调整，比初始高度高 5cm 左右
         },
-        weight=10.0 # 给一个足够大的权重
+        weight=10.0,  # 给一个足够大的权重
     )
 
     lid_lifting_reward = RewTerm(
         func=mdp.lid_lift_progress_reward,
         params={
             "lid_cfg": SceneEntityCfg("lid"),
-            "initial_height": 0.8, # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
+            "initial_height": 0.8,  # 请务必核实这个值！在 print 里看一下 lid 的 z 坐标
             "target_height_lift": 0.2,
-            "height_offset": 0.07, # 与其他奖励保持一致
-            "std": 0.1
+            "height_offset": 0.07,  # 与其他奖励保持一致
+            "std": 0.1,
         },
         # 权重设大一点，告诉它这是最终目标
-        weight=10.0 
+        weight=10.0,
     )
 
 
-
-
 @configclass
 class TerminationsCfg:
     """Termination terms for the MDP."""
@@ -497,7 +526,7 @@ class TerminationsCfg:
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("Mindbot"), "maximum_height": 2.0},
     )
-    
+
     lid_fly_away = DoneTerm(
         func=mdp.base_height_failure,
         params={"asset_cfg": SceneEntityCfg("lid"), "maximum_height": 2.0},
@@ -506,25 +535,25 @@ class TerminationsCfg:
     # 新增：盖子掉落判定
     # 如果盖子掉回桌面（高度接近初始高度），且已经运行了一段时间（避开刚开始重置的瞬间）
     lid_dropped = DoneTerm(
-        func=mdp.base_height_failure, # 复用高度判定函数
+        func=mdp.base_height_failure,  # 复用高度判定函数
         params={
-            "asset_cfg": SceneEntityCfg("lid"), 
-            "minimum_height": 0.79, # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
+            "asset_cfg": SceneEntityCfg("lid"),
+            "minimum_height": 0.79,  # 假设初始高度是 0.9，低于 0.88 说明掉下去了或者被砸进去了
         },
     )
-    
 
 
 ##
 # Environment configuration
 ##
 
+
 @configclass
 class CurriculumCfg:
     pass
     # action_rate = CurrTerm(func=mdp.modify_reward_weight,
     #                        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000})
-                           
+
     # joint_vel = CurrTerm(func=mdp.modify_reward_weight,
     #                      params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000})
 
@@ -533,10 +562,10 @@ class CurriculumCfg:
     #     params={
     #         # 路径：rewards管理器 -> 你的奖励项名 -> params字典 -> std键
     #         "address": "rewards.gripper_lid_position_alignment.params.std",
-            
+
     #         # 修改逻辑：使用我们刚才写的函数
     #         "modify_fn": mdp.annealing_std,
-            
+
     #         # 传给函数的参数
     #         "modify_params": {
     #             "start_step": 00, # 约 600 轮
@@ -547,6 +576,7 @@ class CurriculumCfg:
     #     }
     # )
 
+
 @configclass
 class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     # Scene settings
@@ -564,20 +594,20 @@ class MindbotEnvCfg(ManagerBasedRLEnvCfg):
     def __post_init__(self) -> None:
         """Post initialization."""
         # general settings
-        self.decimation = 4 #original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
+        self.decimation = 4  # original 2  # 从 2 增加到 4，控制频率从 25Hz 降到 12.5Hz
         self.episode_length_s = 10
         # viewer settings
-        self.viewer.eye = (0, 2.5, 2.6)#(3.5, 0.0, 3.2)
+        self.viewer.eye = (0, 2.5, 2.6)  # (3.5, 0.0, 3.2)
         # simulation settings
-        self.sim.dt = 1 / 120   #original 1 / 60
+        self.sim.dt = 1 / 120  # original 1 / 60
         self.sim.render_interval = self.decimation
         # # 1. 基础堆内存
         self.sim.physx.gpu_heap_capacity = 256 * 1024 * 1024  # 256MB
-        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024 
+        self.sim.physx.gpu_collision_stack_size = 1024 * 1024 * 1024
         self.sim.physx.gpu_temp_buffer_capacity = 128 * 1024 * 1024
-        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024 # 200万个接触点
-        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024       # 32万个 patch
-        
+        self.sim.physx.gpu_max_rigid_contact_count = 16 * 1024 * 1024  # 200万个接触点
+        self.sim.physx.gpu_max_rigid_patch_count = 1024 * 1024  # 32万个 patch
+
         # # 5. 聚合对容量 (针对复杂的 Articulation)
         self.sim.physx.gpu_found_lost_pairs_capacity = 16 * 1024 * 1024
         self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 16 * 1024 * 1024