commit 1f7053a306c3a467ce2ab652c2d0a366aa870e1c Author: yutangli Date: Thu Nov 13 17:37:07 2025 +0800 Initial commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1b080bd --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +# ignore .git related folders +.git/ +.github/ +.gitignore +# ignore docs +docs/ +# copy in licenses folder to the container +!docs/licenses/ +# ignore logs +**/logs/ +**/runs/ +**/output/* +**/outputs/* +**/videos/* +*.tmp +# ignore docker +docker/cluster/exports/ +docker/.container.cfg +# ignore recordings +recordings/ +# ignore __pycache__ +**/__pycache__/ +**/*.egg-info/ +# ignore isaac sim symlink +_isaac_sim? +# Docker history +docker/.isaac-lab-docker-history diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..9b4a023 --- /dev/null +++ b/.flake8 @@ -0,0 +1,23 @@ +[flake8] +show-source=True +statistics=True +per-file-ignores=*/__init__.py:F401 +# E402: Module level import not at top of file +# E501: Line too long +# W503: Line break before binary operator +# E203: Whitespace before ':' -> conflicts with black +# D401: First line should be in imperative mood +# R504: Unnecessary variable assignment before return statement. +# R505: Unnecessary elif after return statement +# SIM102: Use a single if-statement instead of nested if-statements +# SIM117: Merge with statements for context managers that have same scope. +# SIM118: Checks for key-existence checks against dict.keys() calls. +ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117,SIM118 +max-line-length = 120 +max-complexity = 30 +exclude=_*,.vscode,.git,docs/** +# docstrings +docstring-convention=google +# annotations +suppress-none-returning=True +allow-star-arg-any=True diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e3c0ead --- /dev/null +++ b/.gitattributes @@ -0,0 +1,14 @@ +*.usd filter=lfs diff=lfs merge=lfs -text +*.usda filter=lfs diff=lfs merge=lfs -text +*.psd filter=lfs diff=lfs merge=lfs -text +*.hdr filter=lfs diff=lfs merge=lfs -text +*.dae filter=lfs diff=lfs merge=lfs -text +*.mtl filter=lfs diff=lfs merge=lfs -text +*.obj filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.jit filter=lfs diff=lfs merge=lfs -text +*.hdf5 filter=lfs diff=lfs merge=lfs -text + +*.bat text eol=crlf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08d2e8d --- /dev/null +++ b/.gitignore @@ -0,0 +1,71 @@ +# C++ +**/cmake-build*/ +**/build*/ +**/*.so +**/*.log* + +# Omniverse +**/*.dmp +**/.thumbs + +# No USD files allowed in the repo +**/*.usd +**/*.usda +**/*.usdc +**/*.usdz + +# Python +.DS_Store +**/*.egg-info/ +**/__pycache__/ +**/.pytest_cache/ +**/*.pyc +**/*.pb + +# Docker/Singularity +**/*.sif +docker/cluster/exports/ +docker/.container.cfg + +# IDE +**/.idea/ +**/.vscode/ +# Don't ignore the top-level .vscode directory as it is +# used to configure VS Code settings +!.vscode + +# Outputs +**/output/* +**/outputs/* +**/videos/* +**/wandb/* +**/.neptune/* +docker/artifacts/ +*.tmp + +# Doc Outputs +**/docs/_build/* +**/generated/* + +# Isaac-Sim packman +_isaac_sim* +_repo +_build +.lastformat + +# RL-Games +**/runs/* +**/logs/* +**/recordings/* + +# Pre-Trained Checkpoints +/.pretrained_checkpoints/ + +# Teleop Recorded Dataset +/datasets/ + +# Tests +tests/ + +# Docker history +.isaac-lab-docker-history diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..86e513b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,86 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +repos: + - repo: https://github.com/python/black + rev: 24.3.0 + hooks: + - id: black + args: ["--line-length", "120", "--unstable"] + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-simplify, flake8-return] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: check-symlinks + - id: destroyed-symlinks + - id: check-added-large-files + args: ["--maxkb=2000"] # restrict files more than 2 MB. Should use git-lfs instead. + - id: check-yaml + - id: check-merge-conflict + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-toml + - id: end-of-file-fixer + - id: check-shebang-scripts-are-executable + - id: detect-private-key + - id: debug-statements + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black", "--filter-files"] + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.1 + hooks: + - id: pyupgrade + args: ["--py310-plus"] + # FIXME: This is a hack because Pytorch does not like: torch.Tensor | dict aliasing + exclude: "source/isaaclab/isaaclab/envs/common.py|source/isaaclab/isaaclab/ui/widgets/image_plot.py|source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/motions/motion_loader.py" + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + additional_dependencies: + - tomli + exclude: "CONTRIBUTORS.md|docs/source/setup/walkthrough/concepts_env_design.rst" + # FIXME: Figure out why this is getting stuck under VPN. + # - repo: https://github.com/RobertCraigie/pyright-python + # rev: v1.1.315 + # hooks: + # - id: pyright + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.1 + hooks: + - id: insert-license + files: \.(py|ya?ml)$ + args: + # - --remove-header # Remove existing license headers. Useful when updating license. + - --license-filepath + - .github/LICENSE_HEADER.txt + - --use-current-year + exclude: "source/isaaclab_mimic/|scripts/imitation_learning/isaaclab_mimic/" + # Apache 2.0 license for mimic files + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.1 + hooks: + - id: insert-license + files: ^(source/isaaclab_mimic|scripts/imitation_learning/isaaclab_mimic)/.*\.py$ + args: + # - --remove-header # Remove existing license headers. Useful when updating license. + - --license-filepath + - .github/LICENSE_HEADER_MIMIC.txt + - --use-current-year + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal diff --git a/.vscode/.gitignore b/.vscode/.gitignore new file mode 100644 index 0000000..10b0af3 --- /dev/null +++ b/.vscode/.gitignore @@ -0,0 +1,10 @@ +# Note: These files are kept for development purposes only. +!tools/launch.template.json +!tools/settings.template.json +!tools/setup_vscode.py +!extensions.json +!tasks.json + +# Ignore all other files +.python.env +*.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..6306e43 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,12 @@ +{ + // See http://go.microsoft.com/fwlink/?LinkId=827846 + // for the documentation about the extensions.json format + "recommendations": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ban.spellright", + "ms-iot.vscode-ros", + "ms-python.black-formatter", + "ms-python.flake8", + ] +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..288b398 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,23 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "setup_python_env", + "type": "shell", + "linux": { + "command": "${input:isaac_path}/python.sh ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}" + }, + "windows": { + "command": "${input:isaac_path}/python.bat ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}" + } + } + ], + "inputs": [ + { + "id": "isaac_path", + "description": "Absolute path to the current Isaac Sim installation. If you installed IsaacSim from pip, the import of it failed. Please make sure you run the task with the correct python environment. As fallback, you can directly execute the python script by running: ``python.sh /.vscode/tools/setup_vscode.py``", + "default": "${HOME}/isaacsim", + "type": "promptString" + }, + ] +} \ No newline at end of file diff --git a/.vscode/tools/launch.template.json b/.vscode/tools/launch.template.json new file mode 100644 index 0000000..a0b1a0c --- /dev/null +++ b/.vscode/tools/launch.template.json @@ -0,0 +1,825 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + // For standalone script execution + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Train Template-Mindbot-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + }, + { + "name": "Python: Play Template-Mindbot-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + }, + // For script execution inside a Docker + { + "name": "Docker: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + } + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rl_games/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with rl_games (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rl_games/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/rsl_rl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with rsl_rl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/rsl_rl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with skrl (AMP)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with skrl (IPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with skrl (MAPPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with skrl (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"], + "program": "${workspaceFolder}/scripts/skrl/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Train Template-Mindbot-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"], + "program": "${workspaceFolder}/scripts/sb3/train.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + { + "name": "Docker: Play Template-Mindbot-v0 with sb3 (PPO)", + "type": "debugpy", + "request": "launch", + "args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"], + "program": "${workspaceFolder}/scripts/sb3/play.py", + "console": "integratedTerminal", + "env": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}" + }, + }, + ] +} \ No newline at end of file diff --git a/.vscode/tools/settings.template.json b/.vscode/tools/settings.template.json new file mode 100644 index 0000000..5b97ac2 --- /dev/null +++ b/.vscode/tools/settings.template.json @@ -0,0 +1,86 @@ +{ + "files.associations": { + "*.tpp": "cpp", + "*.kit": "toml", + "*.rst": "restructuredtext" + }, + "editor.rulers": [120], + + // files to be ignored by the linter + "files.watcherExclude": { + "**/.git/objects/**": true, + "**/.git/subtree-cache/**": true, + "**/node_modules/**": true, + "**/_isaac_sim/**": true, + "**/_compiler/**": true + }, + // Configuration for spelling checker + "spellright.language": [ + "en-US-10-1." + ], + "spellright.documentTypes": [ + "markdown", + "latex", + "plaintext", + "cpp", + "asciidoc", + "python", + "restructuredtext" + ], + "cSpell.words": [ + "literalinclude", + "linenos", + "instanceable", + "isaacSim", + "jacobians", + "pointcloud", + "ridgeback", + "rllib", + "robomimic", + "teleoperation", + "xform", + "numpy", + "tensordict", + "flatcache", + "physx", + "dpad", + "gamepad", + "linspace", + "upsampled", + "downsampled", + "arange", + "discretization", + "trimesh", + "uninstanceable" + ], + // This enables python language server. Seems to work slightly better than jedi: + "python.languageServer": "Pylance", + // We use "black" as a formatter: + "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], + // Use flake8 for linting + "python.linting.pylintEnabled": false, + "python.linting.flake8Enabled": true, + "python.linting.flake8Args": [ + "--max-line-length=120" + ], + // Use docstring generator + "autoDocstring.docstringFormat": "google", + "autoDocstring.guessTypes": true, + // Python environment path + // note: the default interpreter is overridden when user selects a workspace interpreter + // in the status bar. For example, the virtual environment python interpreter + "python.defaultInterpreterPath": "", + // ROS distribution + "ros.distro": "noetic", + // Language specific settings + "[python]": { + "editor.tabSize": 4 + }, + "[restructuredtext]": { + "editor.tabSize": 2 + }, + // Python extra paths + // Note: this is filled up when vscode is set up for the first time + "python.analysis.extraPaths": [] +} diff --git a/.vscode/tools/setup_vscode.py b/.vscode/tools/setup_vscode.py new file mode 100644 index 0000000..3a96361 --- /dev/null +++ b/.vscode/tools/setup_vscode.py @@ -0,0 +1,220 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This script sets up the vs-code settings for the Isaac Lab project. + +This script merges the python.analysis.extraPaths from the "{ISAACSIM_DIR}/.vscode/settings.json" file into +the ".vscode/settings.json" file. + +This is necessary because Isaac Sim 2022.2.1 onwards does not add the necessary python packages to the python path +when the "setup_python_env.sh" is run as part of the vs-code launch configuration. +""" + +import argparse +import os +import pathlib +import platform +import re +import sys + +PROJECT_DIR = pathlib.Path(__file__).parents[2] +"""Path to the the project directory.""" + +try: + import isaacsim # noqa: F401 + + isaacsim_dir = os.environ.get("ISAAC_PATH", "") +except ModuleNotFoundError or ImportError: + # Create a parser to get the isaac-sim path + parser = argparse.ArgumentParser(description="Setup the VSCode settings for the project.") + parser.add_argument("--isaac_path", type=str, help="The absolute path to the Isaac Sim installation.") + args = parser.parse_args() + + # parse the isaac-sim directory + isaacsim_dir = args.isaac_path + # check if the isaac-sim directory is provided + if not os.path.exists(isaacsim_dir): + raise FileNotFoundError( + f"Could not find the isaac-sim directory: {isaacsim_dir}. Please provide the correct path to the Isaac Sim" + " installation." + ) +except EOFError: + print("Unable to trigger EULA acceptance. This is likely due to the script being run in a non-interactive shell.") + print("Please run the script in an interactive shell to accept the EULA.") + print("Skipping the setup of the VSCode settings...") + sys.exit(0) + +# check if the isaac-sim directory exists +if not os.path.exists(isaacsim_dir): + raise FileNotFoundError( + f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:\n\t1. The" + " Isaac Sim directory does not exist as provided CLI path.\n\t2. The script couldn't import the 'isaacsim'" + " package. This could be due to the 'isaacsim' package not being installed in the Python" + " environment.\n\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is" + " installed." + ) + +ISAACSIM_DIR = isaacsim_dir +"""Path to the isaac-sim directory.""" + + +def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str: + """Overwrite the python.analysis.extraPaths in the Isaac Lab settings file. + + The extraPaths are replaced with the path names from the isaac-sim settings file that exists in the + "{ISAACSIM_DIR}/.vscode/settings.json" file. + + If the isaac-sim settings file does not exist, the extraPaths are not overwritten. + + Args: + isaaclab_settings: The settings string to use as template. + + Returns: + The settings string with overwritten python analysis extra paths. + """ + # isaac-sim settings + isaacsim_vscode_filename = os.path.join(ISAACSIM_DIR, ".vscode", "settings.json") + + # we use the isaac-sim settings file to get the python.analysis.extraPaths for kit extensions + # if this file does not exist, we will not add any extra paths + if os.path.exists(isaacsim_vscode_filename): + # read the path names from the isaac-sim settings file + with open(isaacsim_vscode_filename) as f: + vscode_settings = f.read() + # extract the path names + # search for the python.analysis.extraPaths section and extract the contents + settings = re.search( + r"\"python.analysis.extraPaths\": \[.*?\]", vscode_settings, flags=re.MULTILINE | re.DOTALL + ) + settings = settings.group(0) + settings = settings.split('"python.analysis.extraPaths": [')[-1] + settings = settings.split("]")[0] + + # read the path names from the isaac-sim settings file + path_names = settings.split(",") + path_names = [path_name.strip().strip('"') for path_name in path_names] + path_names = [path_name for path_name in path_names if len(path_name) > 0] + + # change the path names to be relative to the Isaac Lab directory + rel_path = os.path.relpath(ISAACSIM_DIR, PROJECT_DIR) + path_names = ['"${workspaceFolder}/' + rel_path + "/" + path_name + '"' for path_name in path_names] + else: + path_names = [] + print( + f"[WARN] Could not find Isaac Sim VSCode settings: {isaacsim_vscode_filename}." + "\n\tThis will result in missing 'python.analysis.extraPaths' in the VSCode" + "\n\tsettings, which limits the functionality of the Python language server." + "\n\tHowever, it does not affect the functionality of the Isaac Lab project." + "\n\tWe are working on a fix for this issue with the Isaac Sim team." + ) + + # add the path names that are in the Isaac Lab extensions directory + isaaclab_extensions = os.listdir(os.path.join(PROJECT_DIR, "source")) + path_names.extend(['"${workspaceFolder}/source/' + ext + '"' for ext in isaaclab_extensions]) + + # combine them into a single string + path_names = ",\n\t\t".expandtabs(4).join(path_names) + # deal with the path separator being different on Windows and Unix + path_names = path_names.replace("\\", "/") + + # replace the path names in the Isaac Lab settings file with the path names parsed + isaaclab_settings = re.sub( + r"\"python.analysis.extraPaths\": \[.*?\]", + '"python.analysis.extraPaths": [\n\t\t'.expandtabs(4) + path_names + "\n\t]".expandtabs(4), + isaaclab_settings, + flags=re.DOTALL, + ) + # return the Isaac Lab settings string + return isaaclab_settings + + +def overwrite_default_python_interpreter(isaaclab_settings: str) -> str: + """Overwrite the default python interpreter in the Isaac Lab settings file. + + The default python interpreter is replaced with the path to the python interpreter used by the + isaac-sim project. This is necessary because the default python interpreter is the one shipped with + isaac-sim. + + Args: + isaaclab_settings: The settings string to use as template. + + Returns: + The settings string with overwritten default python interpreter. + """ + # read executable name + python_exe = os.path.normpath(sys.executable) + + # replace with Isaac Sim's python.sh or python.bat scripts to make sure python with correct + # source paths is set as default + if f"kit{os.sep}python{os.sep}bin{os.sep}python" in python_exe: + # Check if the OS is Windows or Linux to use appropriate shell file + if platform.system() == "Windows": + python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.bat") + else: + python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.sh") + + # replace the default python interpreter in the Isaac Lab settings file with the path to the + # python interpreter in the Isaac Lab directory + isaaclab_settings = re.sub( + r"\"python.defaultInterpreterPath\": \".*?\"", + f'"python.defaultInterpreterPath": "{python_exe}"', + isaaclab_settings, + flags=re.DOTALL, + ) + # return the Isaac Lab settings file + return isaaclab_settings + + +def main(): + # Isaac Lab template settings + isaaclab_vscode_template_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "settings.template.json") + # make sure the Isaac Lab template settings file exists + if not os.path.exists(isaaclab_vscode_template_filename): + raise FileNotFoundError( + f"Could not find the Isaac Lab template settings file: {isaaclab_vscode_template_filename}" + ) + # read the Isaac Lab template settings file + with open(isaaclab_vscode_template_filename) as f: + isaaclab_template_settings = f.read() + + # overwrite the python.analysis.extraPaths in the Isaac Lab settings file with the path names + isaaclab_settings = overwrite_python_analysis_extra_paths(isaaclab_template_settings) + # overwrite the default python interpreter in the Isaac Lab settings file with the path to the + # python interpreter used to call this script + isaaclab_settings = overwrite_default_python_interpreter(isaaclab_settings) + + # add template notice to the top of the file + header_message = ( + "// This file is a template and is automatically generated by the setup_vscode.py script.\n" + "// Do not edit this file directly.\n" + "// \n" + f"// Generated from: {isaaclab_vscode_template_filename}\n" + ) + isaaclab_settings = header_message + isaaclab_settings + + # write the Isaac Lab settings file + isaaclab_vscode_filename = os.path.join(PROJECT_DIR, ".vscode", "settings.json") + with open(isaaclab_vscode_filename, "w") as f: + f.write(isaaclab_settings) + + # copy the launch.json file if it doesn't exist + isaaclab_vscode_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "launch.json") + isaaclab_vscode_template_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "launch.template.json") + if not os.path.exists(isaaclab_vscode_launch_filename): + # read template launch settings + with open(isaaclab_vscode_template_launch_filename) as f: + isaaclab_template_launch_settings = f.read() + # add header + header_message = header_message.replace( + isaaclab_vscode_template_filename, isaaclab_vscode_template_launch_filename + ) + isaaclab_launch_settings = header_message + isaaclab_template_launch_settings + # write the Isaac Lab launch settings file + with open(isaaclab_vscode_launch_filename, "w") as f: + f.write(isaaclab_launch_settings) + + +if __name__ == "__main__": + main() diff --git a/README.md b/README.md new file mode 100644 index 0000000..ead3f36 --- /dev/null +++ b/README.md @@ -0,0 +1,135 @@ +# Template for Isaac Lab Projects + +## Overview + +This project/repository serves as a template for building projects or extensions based on Isaac Lab. +It allows you to develop in an isolated environment, outside of the core Isaac Lab repository. + +**Key Features:** + +- `Isolation` Work outside the core Isaac Lab repository, ensuring that your development efforts remain self-contained. +- `Flexibility` This template is set up to allow your code to be run as an extension in Omniverse. + +**Keywords:** extension, template, isaaclab + +## Installation + +- Install Isaac Lab by following the [installation guide](https://isaac-sim.github.io/IsaacLab/main/source/setup/installation/index.html). + We recommend using the conda or uv installation as it simplifies calling Python scripts from the terminal. + +- Clone or copy this project/repository separately from the Isaac Lab installation (i.e. outside the `IsaacLab` directory): + +- Using a python interpreter that has Isaac Lab installed, install the library in editable mode using: + + ```bash + # use 'PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda + python -m pip install -e source/mindbot + +- Verify that the extension is correctly installed by: + + - Listing the available tasks: + + Note: It the task name changes, it may be necessary to update the search pattern `"Template-"` + (in the `scripts/list_envs.py` file) so that it can be listed. + + ```bash + # use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda + python scripts/list_envs.py + ``` + + - Running a task: + + ```bash + # use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda + python scripts//train.py --task= + ``` + + - Running a task with dummy agents: + + These include dummy agents that output zero or random agents. They are useful to ensure that the environments are configured correctly. + + - Zero-action agent + + ```bash + # use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda + python scripts/zero_agent.py --task= + ``` + - Random-action agent + + ```bash + # use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda + python scripts/random_agent.py --task= + ``` + +### Set up IDE (Optional) + +To setup the IDE, please follow these instructions: + +- Run VSCode Tasks, by pressing `Ctrl+Shift+P`, selecting `Tasks: Run Task` and running the `setup_python_env` in the drop down menu. + When running this task, you will be prompted to add the absolute path to your Isaac Sim installation. + +If everything executes correctly, it should create a file .python.env in the `.vscode` directory. +The file contains the python paths to all the extensions provided by Isaac Sim and Omniverse. +This helps in indexing all the python modules for intelligent suggestions while writing code. + +### Setup as Omniverse Extension (Optional) + +We provide an example UI extension that will load upon enabling your extension defined in `source/mindbot/mindbot/ui_extension_example.py`. + +To enable your extension, follow these steps: + +1. **Add the search path of this project/repository** to the extension manager: + - Navigate to the extension manager using `Window` -> `Extensions`. + - Click on the **Hamburger Icon**, then go to `Settings`. + - In the `Extension Search Paths`, enter the absolute path to the `source` directory of this project/repository. + - If not already present, in the `Extension Search Paths`, enter the path that leads to Isaac Lab's extension directory directory (`IsaacLab/source`) + - Click on the **Hamburger Icon**, then click `Refresh`. + +2. **Search and enable your extension**: + - Find your extension under the `Third Party` category. + - Toggle it to enable your extension. + +## Code formatting + +We have a pre-commit template to automatically format your code. +To install pre-commit: + +```bash +pip install pre-commit +``` + +Then you can run pre-commit with: + +```bash +pre-commit run --all-files +``` + +## Troubleshooting + +### Pylance Missing Indexing of Extensions + +In some VsCode versions, the indexing of part of the extensions is missing. +In this case, add the path to your extension in `.vscode/settings.json` under the key `"python.analysis.extraPaths"`. + +```json +{ + "python.analysis.extraPaths": [ + "/source/mindbot" + ] +} +``` + +### Pylance Crash + +If you encounter a crash in `pylance`, it is probable that too many files are indexed and you run out of memory. +A possible solution is to exclude some of omniverse packages that are not used in your project. +To do so, modify `.vscode/settings.json` and comment out packages under the key `"python.analysis.extraPaths"` +Some examples of packages that can likely be excluded are: + +```json +"/extscache/omni.anim.*" // Animation packages +"/extscache/omni.kit.*" // Kit UI tools +"/extscache/omni.graph.*" // Graph UI tools +"/extscache/omni.services.*" // Services tools +... +``` \ No newline at end of file diff --git a/scripts/list_envs.py b/scripts/list_envs.py new file mode 100644 index 0000000..ddda069 --- /dev/null +++ b/scripts/list_envs.py @@ -0,0 +1,64 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Script to print all the available environments in Isaac Lab. + +The script iterates over all registered environments and stores the details in a table. +It prints the name of the environment, the entry point and the config file. + +All the environments are registered in the `mindbot` extension. They start +with `Isaac` in their name. +""" + +"""Launch Isaac Sim Simulator first.""" + +from isaaclab.app import AppLauncher + +# launch omniverse app +app_launcher = AppLauncher(headless=True) +simulation_app = app_launcher.app + + +"""Rest everything follows.""" + +import gymnasium as gym +from prettytable import PrettyTable + +import mindbot.tasks # noqa: F401 + + +def main(): + """Print all environments registered in `mindbot` extension.""" + # print all the available environments + table = PrettyTable(["S. No.", "Task Name", "Entry Point", "Config"]) + table.title = "Available Environments in Isaac Lab" + # set alignment of table columns + table.align["Task Name"] = "l" + table.align["Entry Point"] = "l" + table.align["Config"] = "l" + + # count of environments + index = 0 + # acquire all Isaac environments names + for task_spec in gym.registry.values(): + if "Template-" in task_spec.id: + # add details to table + table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec.kwargs["env_cfg_entry_point"]]) + # increment count + index += 1 + + print(table) + + +if __name__ == "__main__": + try: + # run the main function + main() + except Exception as e: + raise e + finally: + # close the app + simulation_app.close() diff --git a/scripts/random_agent.py b/scripts/random_agent.py new file mode 100644 index 0000000..abeaf32 --- /dev/null +++ b/scripts/random_agent.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to an environment with random action agent.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Random agent for Isaac Lab environments.") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli = parser.parse_args() + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import torch + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import parse_env_cfg + +import mindbot.tasks # noqa: F401 + + +def main(): + """Random actions agent with Isaac Lab environment.""" + # create environment configuration + env_cfg = parse_env_cfg( + args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric + ) + # create environment + env = gym.make(args_cli.task, cfg=env_cfg) + + # print info (this is vectorized environment) + print(f"[INFO]: Gym observation space: {env.observation_space}") + print(f"[INFO]: Gym action space: {env.action_space}") + # reset environment + env.reset() + # simulate environment + while simulation_app.is_running(): + # run everything in inference mode + with torch.inference_mode(): + # sample actions from -1 to 1 + actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1 + # apply actions + env.step(actions) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/rl_games/play.py b/scripts/rl_games/play.py new file mode 100644 index 0000000..223c358 --- /dev/null +++ b/scripts/rl_games/play.py @@ -0,0 +1,243 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to play a checkpoint if an RL agent from RL-Games.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", +) +parser.add_argument( + "--use_last_checkpoint", + action="store_true", + help="When no checkpoint provided, use the last saved model. Otherwise use the best saved model.", +) +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + + +import gymnasium as gym +import math +import os +import random +import time +import torch + +from rl_games.common import env_configurations, vecenv +from rl_games.common.player import BasePlayer +from rl_games.torch_runner import Runner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + """Play with RL-Games agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + # update agent device to match simulation device + if args_cli.device is not None: + agent_cfg["params"]["config"]["device"] = args_cli.device + agent_cfg["params"]["config"]["device_name"] = args_cli.device + + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + agent_cfg["params"]["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["params"]["seed"] + # set the environment seed (after multi-gpu config for updated rank from agent seed) + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["params"]["seed"] + + # specify directory for logging experiments + log_root_path = os.path.join("logs", "rl_games", agent_cfg["params"]["config"]["name"]) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + # find checkpoint + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rl_games", train_task_name) + if not resume_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint is None: + # specify directory for logging runs + run_dir = agent_cfg["params"]["config"].get("full_experiment_name", ".*") + # specify name of checkpoint + if args_cli.use_last_checkpoint: + checkpoint_file = ".*" + else: + # this loads the best checkpoint + checkpoint_file = f"{agent_cfg['params']['config']['name']}.pth" + # get path to previous checkpoint + resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file, other_dirs=["nn"]) + else: + resume_path = retrieve_file_path(args_cli.checkpoint) + log_dir = os.path.dirname(os.path.dirname(resume_path)) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # wrap around environment for rl-games + rl_device = agent_cfg["params"]["config"]["device"] + clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) + clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) + obs_groups = agent_cfg["params"]["env"].get("obs_groups") + concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_root_path, log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for rl-games + env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) + + # register the environment to rl-games registry + # note: in agents configuration: environment name must be "rlgpu" + vecenv.register( + "IsaacRlgWrapper", lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs) + ) + env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) + + # load previously trained model + agent_cfg["params"]["load_checkpoint"] = True + agent_cfg["params"]["load_path"] = resume_path + print(f"[INFO]: Loading model checkpoint from: {agent_cfg['params']['load_path']}") + + # set number of actors into agent config + agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs + # create runner from rl-games + runner = Runner() + runner.load(agent_cfg) + # obtain the agent from the runner + agent: BasePlayer = runner.create_player() + agent.restore(resume_path) + agent.reset() + + dt = env.unwrapped.step_dt + + # reset environment + obs = env.reset() + if isinstance(obs, dict): + obs = obs["obs"] + timestep = 0 + # required: enables the flag for batched observations + _ = agent.get_batch_size(obs, 1) + # initialize RNN states if used + if agent.is_rnn: + agent.init_rnn() + # simulate environment + # note: We simplified the logic in rl-games player.py (:func:`BasePlayer.run()`) function in an + # attempt to have complete control over environment stepping. However, this removes other + # operations such as masking that is used for multi-agent learning by RL-Games. + while simulation_app.is_running(): + start_time = time.time() + # run everything in inference mode + with torch.inference_mode(): + # convert obs to agent format + obs = agent.obs_to_torch(obs) + # agent stepping + actions = agent.get_action(obs, is_deterministic=agent.is_deterministic) + # env stepping + obs, _, dones, _ = env.step(actions) + + # perform operations for terminated episodes + if len(dones) > 0: + # reset rnn state for terminated episodes + if agent.is_rnn and agent.states is not None: + for s in agent.states: + s[:, dones, :] = 0.0 + if args_cli.video: + timestep += 1 + # exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/rl_games/train.py b/scripts/rl_games/train.py new file mode 100644 index 0000000..8cd48d9 --- /dev/null +++ b/scripts/rl_games/train.py @@ -0,0 +1,255 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to train RL agent with RL-Games.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys +from distutils.util import strtobool + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") +parser.add_argument("--sigma", type=str, default=None, help="The policy's initial standard deviation.") +parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument("--wandb-project-name", type=str, default=None, help="the wandb's project name") +parser.add_argument("--wandb-entity", type=str, default=None, help="the entity (team) of wandb's project") +parser.add_argument("--wandb-name", type=str, default=None, help="the name of wandb's run") +parser.add_argument( + "--track", + type=lambda x: bool(strtobool(x)), + default=False, + nargs="?", + const=True, + help="if toggled, this experiment will be tracked with Weights and Biases", +) +parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import math +import os +import random +from datetime import datetime + +import omni +from rl_games.common import env_configurations, vecenv +from rl_games.common.algo_observer import IsaacAlgoObserver +from rl_games.torch_runner import Runner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.rl_games import MultiObserver, PbtAlgoObserver, RlGamesGpuEnv, RlGamesVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + """Train with RL-Games agent.""" + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + # check for invalid combination of CPU device with distributed training + if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device: + raise ValueError( + "Distributed training is not supported when using CPU device. " + "Please use GPU device (e.g., --device cuda) for distributed training." + ) + + # update agent device to match simulation device + if args_cli.device is not None: + agent_cfg["params"]["config"]["device"] = args_cli.device + agent_cfg["params"]["config"]["device_name"] = args_cli.device + + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + agent_cfg["params"]["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["params"]["seed"] + agent_cfg["params"]["config"]["max_epochs"] = ( + args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg["params"]["config"]["max_epochs"] + ) + if args_cli.checkpoint is not None: + resume_path = retrieve_file_path(args_cli.checkpoint) + agent_cfg["params"]["load_checkpoint"] = True + agent_cfg["params"]["load_path"] = resume_path + print(f"[INFO]: Loading model checkpoint from: {agent_cfg['params']['load_path']}") + train_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None + + # multi-gpu training config + if args_cli.distributed: + agent_cfg["params"]["seed"] += app_launcher.global_rank + agent_cfg["params"]["config"]["device"] = f"cuda:{app_launcher.local_rank}" + agent_cfg["params"]["config"]["device_name"] = f"cuda:{app_launcher.local_rank}" + agent_cfg["params"]["config"]["multi_gpu"] = True + # update env config device + env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" + + # set the environment seed (after multi-gpu config for updated rank from agent seed) + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["params"]["seed"] + + # specify directory for logging experiments + config_name = agent_cfg["params"]["config"]["name"] + log_root_path = os.path.join("logs", "rl_games", config_name) + if "pbt" in agent_cfg: + if agent_cfg["pbt"]["directory"] == ".": + log_root_path = os.path.abspath(log_root_path) + else: + log_root_path = os.path.join(agent_cfg["pbt"]["directory"], log_root_path) + + print(f"[INFO] Logging experiment in directory: {log_root_path}") + # specify directory for logging runs + log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + # set directory into agent config + # logging directory path: / + agent_cfg["params"]["config"]["train_dir"] = log_root_path + agent_cfg["params"]["config"]["full_experiment_name"] = log_dir + wandb_project = config_name if args_cli.wandb_project_name is None else args_cli.wandb_project_name + experiment_name = log_dir if args_cli.wandb_name is None else args_cli.wandb_name + + # dump the configuration into log-directory + dump_yaml(os.path.join(log_root_path, log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_root_path, log_dir, "params", "agent.yaml"), agent_cfg) + + # read configurations about the agent-training + rl_device = agent_cfg["params"]["config"]["device"] + clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf) + clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf) + obs_groups = agent_cfg["params"]["env"].get("obs_groups") + concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + + # set the IO descriptors export flag if requested + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.export_io_descriptors = args_cli.export_io_descriptors + else: + omni.log.warn( + "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." + ) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = os.path.join(log_root_path, log_dir) + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_root_path, log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for rl-games + env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups) + + # register the environment to rl-games registry + # note: in agents configuration: environment name must be "rlgpu" + vecenv.register( + "IsaacRlgWrapper", lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs) + ) + env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env}) + + # set number of actors into agent config + agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs + # create runner from rl-games + + if "pbt" in agent_cfg and agent_cfg["pbt"]["enabled"]: + observers = MultiObserver([IsaacAlgoObserver(), PbtAlgoObserver(agent_cfg, args_cli)]) + runner = Runner(observers) + else: + runner = Runner(IsaacAlgoObserver()) + + runner.load(agent_cfg) + + # reset the agent and env + runner.reset() + # train the agent + + global_rank = int(os.getenv("RANK", "0")) + if args_cli.track and global_rank == 0: + if args_cli.wandb_entity is None: + raise ValueError("Weights and Biases entity must be specified for tracking.") + import wandb + + wandb.init( + project=wandb_project, + entity=args_cli.wandb_entity, + name=experiment_name, + sync_tensorboard=True, + monitor_gym=True, + save_code=True, + ) + if not wandb.run.resumed: + wandb.config.update({"env_cfg": env_cfg.to_dict()}) + wandb.config.update({"agent_cfg": agent_cfg}) + + if args_cli.checkpoint is not None: + runner.run({"train": True, "play": False, "sigma": train_sigma, "checkpoint": resume_path}) + else: + runner.run({"train": True, "play": False, "sigma": train_sigma}) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/rsl_rl/cli_args.py b/scripts/rsl_rl/cli_args.py new file mode 100644 index 0000000..c176f77 --- /dev/null +++ b/scripts/rsl_rl/cli_args.py @@ -0,0 +1,91 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import argparse +import random +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg + + +def add_rsl_rl_args(parser: argparse.ArgumentParser): + """Add RSL-RL arguments to the parser. + + Args: + parser: The parser to add the arguments to. + """ + # create a new argument group + arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.") + # -- experiment arguments + arg_group.add_argument( + "--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored." + ) + arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.") + # -- load arguments + arg_group.add_argument("--resume", action="store_true", default=False, help="Whether to resume from a checkpoint.") + arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.") + arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.") + # -- logger arguments + arg_group.add_argument( + "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use." + ) + arg_group.add_argument( + "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune." + ) + + +def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg: + """Parse configuration for RSL-RL agent based on inputs. + + Args: + task_name: The name of the environment. + args_cli: The command line arguments. + + Returns: + The parsed configuration for RSL-RL agent based on inputs. + """ + from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry + + # load the default configuration + rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") + rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli) + return rslrl_cfg + + +def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace): + """Update configuration for RSL-RL agent based on inputs. + + Args: + agent_cfg: The configuration for RSL-RL agent. + args_cli: The command line arguments. + + Returns: + The updated configuration for RSL-RL agent based on inputs. + """ + # override the default configuration with CLI arguments + if hasattr(args_cli, "seed") and args_cli.seed is not None: + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + agent_cfg.seed = args_cli.seed + if args_cli.resume is not None: + agent_cfg.resume = args_cli.resume + if args_cli.load_run is not None: + agent_cfg.load_run = args_cli.load_run + if args_cli.checkpoint is not None: + agent_cfg.load_checkpoint = args_cli.checkpoint + if args_cli.run_name is not None: + agent_cfg.run_name = args_cli.run_name + if args_cli.logger is not None: + agent_cfg.logger = args_cli.logger + # set the project name for wandb and neptune + if agent_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name: + agent_cfg.wandb_project = args_cli.log_project_name + agent_cfg.neptune_project = args_cli.log_project_name + + return agent_cfg diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py new file mode 100644 index 0000000..3fdbe24 --- /dev/null +++ b/scripts/rsl_rl/play.py @@ -0,0 +1,210 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to play a checkpoint if an RL agent from RSL-RL.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# local imports +import cli_args # isort: skip + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", +) +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +# append RSL-RL cli arguments +cli_args.add_rsl_rl_args(parser) +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import time +import torch + +from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + """Play with RSL-RL agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + # override configurations with non-hydra CLI arguments + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # specify directory for logging experiments + log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) + if not resume_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint: + resume_path = retrieve_file_path(args_cli.checkpoint) + else: + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + log_dir = os.path.dirname(resume_path) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for rsl-rl + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + # load previously trained model + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) + + # obtain the trained policy for inference + policy = runner.get_inference_policy(device=env.unwrapped.device) + + # extract the neural network module + # we do this in a try-except to maintain backwards compatibility. + try: + # version 2.3 onwards + policy_nn = runner.alg.policy + except AttributeError: + # version 2.2 and below + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None + + # export policy to onnx/jit + export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + + dt = env.unwrapped.step_dt + + # reset environment + obs = env.get_observations() + timestep = 0 + # simulate environment + while simulation_app.is_running(): + start_time = time.time() + # run everything in inference mode + with torch.inference_mode(): + # agent stepping + actions = policy(obs) + # env stepping + obs, _, dones, _ = env.step(actions) + # reset recurrent states for episodes that have terminated + policy_nn.reset(dones) + if args_cli.video: + timestep += 1 + # Exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/rsl_rl/train.py b/scripts/rsl_rl/train.py new file mode 100644 index 0000000..a1e5765 --- /dev/null +++ b/scripts/rsl_rl/train.py @@ -0,0 +1,217 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to train RL agent with RSL-RL.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# local imports +import cli_args # isort: skip + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument( + "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." +) +parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") +# append RSL-RL cli arguments +cli_args.add_rsl_rl_args(parser) +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +args_cli, hydra_args = parser.parse_known_args() + +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Check for minimum supported RSL-RL version.""" + +import importlib.metadata as metadata +import platform + +from packaging import version + +# check minimum supported rsl-rl version +RSL_RL_VERSION = "3.0.1" +installed_version = metadata.version("rsl-rl-lib") +if version.parse(installed_version) < version.parse(RSL_RL_VERSION): + if platform.system() == "Windows": + cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + else: + cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + print( + f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'" + f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:" + f"\n\n\t{' '.join(cmd)}\n" + ) + exit(1) + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import torch +from datetime import datetime + +import omni +from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + +torch.backends.cuda.matmul.allow_tf32 = True +torch.backends.cudnn.allow_tf32 = True +torch.backends.cudnn.deterministic = False +torch.backends.cudnn.benchmark = False + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + """Train with RSL-RL agent.""" + # override configurations with non-hydra CLI arguments + agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + agent_cfg.max_iterations = ( + args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations + ) + + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + # check for invalid combination of CPU device with distributed training + if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device: + raise ValueError( + "Distributed training is not supported when using CPU device. " + "Please use GPU device (e.g., --device cuda) for distributed training." + ) + + # multi-gpu training configuration + if args_cli.distributed: + env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" + agent_cfg.device = f"cuda:{app_launcher.local_rank}" + + # set seed to have diversity in different threads + seed = agent_cfg.seed + app_launcher.local_rank + env_cfg.seed = seed + agent_cfg.seed = seed + + # specify directory for logging experiments + log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + # specify directory for logging runs: {time-stamp}_{run_name} + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) + print(f"Exact experiment name requested from command line: {log_dir}") + if agent_cfg.run_name: + log_dir += f"_{agent_cfg.run_name}" + log_dir = os.path.join(log_root_path, log_dir) + + # set the IO descriptors export flag if requested + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.export_io_descriptors = args_cli.export_io_descriptors + else: + omni.log.warn( + "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." + ) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # save resume path before creating a new log_dir + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for rsl-rl + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + + # create runner from rsl-rl + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + # write git state to logs + runner.add_git_repo_to_log(__file__) + # load the checkpoint + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + # load previously trained model + runner.load(resume_path) + + # dump the configuration into log-directory + dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) + + # run training + runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/sb3/play.py b/scripts/sb3/play.py new file mode 100644 index 0000000..478b129 --- /dev/null +++ b/scripts/sb3/play.py @@ -0,0 +1,213 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to play a checkpoint if an RL agent from Stable-Baselines3.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys +from pathlib import Path + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", +) +parser.add_argument( + "--use_last_checkpoint", + action="store_true", + help="When no checkpoint provided, use the last saved model. Otherwise use the best saved model.", +) +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +parser.add_argument( + "--keep_all_info", + action="store_true", + default=False, + help="Use a slower SB3 wrapper but keep all the extra training info.", +) +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() + +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import random +import time +import torch + +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import VecNormalize + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.dict import print_dict +from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +from isaaclab_rl.sb3 import Sb3VecEnvWrapper, process_sb3_cfg + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config +from isaaclab_tasks.utils.parse_cfg import get_checkpoint_path + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + """Play with stable-baselines agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"] + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["seed"] + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # directory for logging into + log_root_path = os.path.join("logs", "sb3", train_task_name) + log_root_path = os.path.abspath(log_root_path) + # checkpoint and log_dir stuff + if args_cli.use_pretrained_checkpoint: + checkpoint_path = get_published_pretrained_checkpoint("sb3", train_task_name) + if not checkpoint_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint is None: + # FIXME: last checkpoint doesn't seem to really use the last one' + if args_cli.use_last_checkpoint: + checkpoint = "model_.*.zip" + else: + checkpoint = "model.zip" + checkpoint_path = get_checkpoint_path(log_root_path, ".*", checkpoint, sort_alpha=False) + else: + checkpoint_path = args_cli.checkpoint + log_dir = os.path.dirname(checkpoint_path) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # post-process agent configuration + agent_cfg = process_sb3_cfg(agent_cfg, env.unwrapped.num_envs) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + # wrap around environment for stable baselines + env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info) + + vec_norm_path = checkpoint_path.replace("/model", "/model_vecnormalize").replace(".zip", ".pkl") + vec_norm_path = Path(vec_norm_path) + + # normalize environment (if needed) + if vec_norm_path.exists(): + print(f"Loading saved normalization: {vec_norm_path}") + env = VecNormalize.load(vec_norm_path, env) + # do not update them at test time + env.training = False + # reward normalization is not needed at test time + env.norm_reward = False + elif "normalize_input" in agent_cfg: + env = VecNormalize( + env, + training=True, + norm_obs="normalize_input" in agent_cfg and agent_cfg.pop("normalize_input"), + clip_obs="clip_obs" in agent_cfg and agent_cfg.pop("clip_obs"), + ) + + # create agent from stable baselines + print(f"Loading checkpoint from: {checkpoint_path}") + agent = PPO.load(checkpoint_path, env, print_system_info=True) + + dt = env.unwrapped.step_dt + + # reset environment + obs = env.reset() + timestep = 0 + # simulate environment + while simulation_app.is_running(): + start_time = time.time() + # run everything in inference mode + with torch.inference_mode(): + # agent stepping + actions, _ = agent.predict(obs, deterministic=True) + # env stepping + obs, _, _, _ = env.step(actions) + if args_cli.video: + timestep += 1 + # Exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/sb3/train.py b/scripts/sb3/train.py new file mode 100644 index 0000000..181109b --- /dev/null +++ b/scripts/sb3/train.py @@ -0,0 +1,229 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +"""Script to train RL agent with Stable Baselines3.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import contextlib +import signal +import sys +from pathlib import Path + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--log_interval", type=int, default=100_000, help="Log data every n timesteps.") +parser.add_argument("--checkpoint", type=str, default=None, help="Continue the training from checkpoint.") +parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") +parser.add_argument( + "--keep_all_info", + action="store_true", + default=False, + help="Use a slower SB3 wrapper but keep all the extra training info.", +) +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + + +def cleanup_pbar(*args): + """ + A small helper to stop training and + cleanup progress bar properly on ctrl+c + """ + import gc + + tqdm_objects = [obj for obj in gc.get_objects() if "tqdm" in type(obj).__name__] + for tqdm_object in tqdm_objects: + if "tqdm_rich" in type(tqdm_object).__name__: + tqdm_object.close() + raise KeyboardInterrupt + + +# disable KeyboardInterrupt override +signal.signal(signal.SIGINT, cleanup_pbar) + +"""Rest everything follows.""" + +import gymnasium as gym +import numpy as np +import os +import random +from datetime import datetime + +import omni +from stable_baselines3 import PPO +from stable_baselines3.common.callbacks import CheckpointCallback, LogEveryNTimesteps +from stable_baselines3.common.vec_env import VecNormalize + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.sb3 import Sb3VecEnvWrapper, process_sb3_cfg + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + """Train with stable-baselines agent.""" + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"] + # max iterations for training + if args_cli.max_iterations is not None: + agent_cfg["n_timesteps"] = args_cli.max_iterations * agent_cfg["n_steps"] * env_cfg.scene.num_envs + + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["seed"] + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # directory for logging into + run_info = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_root_path = os.path.abspath(os.path.join("logs", "sb3", args_cli.task)) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) + print(f"Exact experiment name requested from command line: {run_info}") + log_dir = os.path.join(log_root_path, run_info) + # dump the configuration into log-directory + dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) + + # save command used to run the script + command = " ".join(sys.orig_argv) + (Path(log_dir) / "command.txt").write_text(command) + + # post-process agent configuration + agent_cfg = process_sb3_cfg(agent_cfg, env_cfg.scene.num_envs) + # read configurations about the agent-training + policy_arch = agent_cfg.pop("policy") + n_timesteps = agent_cfg.pop("n_timesteps") + + # set the IO descriptors export flag if requested + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.export_io_descriptors = args_cli.export_io_descriptors + else: + omni.log.warn( + "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." + ) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for stable baselines + env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info) + + norm_keys = {"normalize_input", "normalize_value", "clip_obs"} + norm_args = {} + for key in norm_keys: + if key in agent_cfg: + norm_args[key] = agent_cfg.pop(key) + + if norm_args and norm_args.get("normalize_input"): + print(f"Normalizing input, {norm_args=}") + env = VecNormalize( + env, + training=True, + norm_obs=norm_args["normalize_input"], + norm_reward=norm_args.get("normalize_value", False), + clip_obs=norm_args.get("clip_obs", 100.0), + gamma=agent_cfg["gamma"], + clip_reward=np.inf, + ) + + # create agent from stable baselines + agent = PPO(policy_arch, env, verbose=1, tensorboard_log=log_dir, **agent_cfg) + if args_cli.checkpoint is not None: + agent = agent.load(args_cli.checkpoint, env, print_system_info=True) + + # callbacks for agent + checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix="model", verbose=2) + callbacks = [checkpoint_callback, LogEveryNTimesteps(n_steps=args_cli.log_interval)] + + # train the agent + with contextlib.suppress(KeyboardInterrupt): + agent.learn( + total_timesteps=n_timesteps, + callback=callbacks, + progress_bar=True, + log_interval=None, + ) + # save the final model + agent.save(os.path.join(log_dir, "model")) + print("Saving to:") + print(os.path.join(log_dir, "model.zip")) + + if isinstance(env, VecNormalize): + print("Saving normalization") + env.save(os.path.join(log_dir, "model_vecnormalize.pkl")) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/skrl/play.py b/scripts/skrl/play.py new file mode 100644 index 0000000..cb68831 --- /dev/null +++ b/scripts/skrl/play.py @@ -0,0 +1,250 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Script to play a checkpoint of an RL agent from skrl. + +Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in +a more user-friendly way. +""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from skrl.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", + type=str, + default=None, + help=( + "Name of the RL agent configuration entry point. Defaults to None, in which case the argument " + "--algorithm is used to determine the default agent configuration entry point." + ), +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", +) +parser.add_argument( + "--ml_framework", + type=str, + default="torch", + choices=["torch", "jax", "jax-numpy"], + help="The ML framework used for training the skrl agent.", +) +parser.add_argument( + "--algorithm", + type=str, + default="PPO", + choices=["AMP", "PPO", "IPPO", "MAPPO"], + help="The RL algorithm used for training the skrl agent.", +) +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") + +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import random +import time +import torch + +import skrl +from packaging import version + +# check for minimum supported skrl version +SKRL_VERSION = "1.4.3" +if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): + skrl.logger.error( + f"Unsupported skrl version: {skrl.__version__}. " + f"Install supported version using 'pip install skrl>={SKRL_VERSION}'" + ) + exit() + +if args_cli.ml_framework.startswith("torch"): + from skrl.utils.runner.torch import Runner +elif args_cli.ml_framework.startswith("jax"): + from skrl.utils.runner.jax import Runner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.dict import print_dict +from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +from isaaclab_rl.skrl import SkrlVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + +# config shortcuts +if args_cli.agent is None: + algorithm = args_cli.algorithm.lower() + agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +else: + agent_cfg_entry_point = args_cli.agent + algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower() + + +@hydra_task_config(args_cli.task, agent_cfg_entry_point) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, experiment_cfg: dict): + """Play with skrl agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # configure the ML framework into the global skrl variable + if args_cli.ml_framework.startswith("jax"): + skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy" + + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + # set the agent and environment seed from command line + # note: certain randomization occur in the environment initialization so we set the seed here + experiment_cfg["seed"] = args_cli.seed if args_cli.seed is not None else experiment_cfg["seed"] + env_cfg.seed = experiment_cfg["seed"] + + # specify directory for logging experiments (load checkpoint) + log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"]) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Loading experiment from directory: {log_root_path}") + # get checkpoint path + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("skrl", train_task_name) + if not resume_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint: + resume_path = os.path.abspath(args_cli.checkpoint) + else: + resume_path = get_checkpoint_path( + log_root_path, run_dir=f".*_{algorithm}_{args_cli.ml_framework}", other_dirs=["checkpoints"] + ) + log_dir = os.path.dirname(os.path.dirname(resume_path)) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + + # get environment (step) dt for real-time evaluation + try: + dt = env.step_dt + except AttributeError: + dt = env.unwrapped.step_dt + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for skrl + env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")` + + # configure and instantiate the skrl runner + # https://skrl.readthedocs.io/en/latest/api/utils/runner.html + experiment_cfg["trainer"]["close_environment_at_exit"] = False + experiment_cfg["agent"]["experiment"]["write_interval"] = 0 # don't log to TensorBoard + experiment_cfg["agent"]["experiment"]["checkpoint_interval"] = 0 # don't generate checkpoints + runner = Runner(env, experiment_cfg) + + print(f"[INFO] Loading model checkpoint from: {resume_path}") + runner.agent.load(resume_path) + # set agent to evaluation mode + runner.agent.set_running_mode("eval") + + # reset environment + obs, _ = env.reset() + timestep = 0 + # simulate environment + while simulation_app.is_running(): + start_time = time.time() + + # run everything in inference mode + with torch.inference_mode(): + # agent stepping + outputs = runner.agent.act(obs, timestep=0, timesteps=0) + # - multi-agent (deterministic) actions + if hasattr(env, "possible_agents"): + actions = {a: outputs[-1][a].get("mean_actions", outputs[0][a]) for a in env.possible_agents} + # - single-agent (deterministic) actions + else: + actions = outputs[-1].get("mean_actions", outputs[0]) + # env stepping + obs, _, _, _, _ = env.step(actions) + if args_cli.video: + timestep += 1 + # exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/skrl/train.py b/scripts/skrl/train.py new file mode 100644 index 0000000..ecfc42b --- /dev/null +++ b/scripts/skrl/train.py @@ -0,0 +1,235 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Script to train RL agent with skrl. + +Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in +a more user-friendly way. +""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import sys + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Train an RL agent with skrl.") +parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", + type=str, + default=None, + help=( + "Name of the RL agent configuration entry point. Defaults to None, in which case the argument " + "--algorithm is used to determine the default agent configuration entry point." + ), +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument( + "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." +) +parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint to resume training.") +parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") +parser.add_argument( + "--ml_framework", + type=str, + default="torch", + choices=["torch", "jax", "jax-numpy"], + help="The ML framework used for training the skrl agent.", +) +parser.add_argument( + "--algorithm", + type=str, + default="PPO", + choices=["AMP", "PPO", "IPPO", "MAPPO"], + help="The RL algorithm used for training the skrl agent.", +) + +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import os +import random +from datetime import datetime + +import omni +import skrl +from packaging import version + +# check for minimum supported skrl version +SKRL_VERSION = "1.4.3" +if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): + skrl.logger.error( + f"Unsupported skrl version: {skrl.__version__}. " + f"Install supported version using 'pip install skrl>={SKRL_VERSION}'" + ) + exit() + +if args_cli.ml_framework.startswith("torch"): + from skrl.utils.runner.torch import Runner +elif args_cli.ml_framework.startswith("jax"): + from skrl.utils.runner.jax import Runner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.skrl import SkrlVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config + +import mindbot.tasks # noqa: F401 + +# config shortcuts +if args_cli.agent is None: + algorithm = args_cli.algorithm.lower() + agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +else: + agent_cfg_entry_point = args_cli.agent + algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower() + + +@hydra_task_config(args_cli.task, agent_cfg_entry_point) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + """Train with skrl agent.""" + # override configurations with non-hydra CLI arguments + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # check for invalid combination of CPU device with distributed training + if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device: + raise ValueError( + "Distributed training is not supported when using CPU device. " + "Please use GPU device (e.g., --device cuda) for distributed training." + ) + + # multi-gpu training config + if args_cli.distributed: + env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" + # max iterations for training + if args_cli.max_iterations: + agent_cfg["trainer"]["timesteps"] = args_cli.max_iterations * agent_cfg["agent"]["rollouts"] + agent_cfg["trainer"]["close_environment_at_exit"] = False + # configure the ML framework into the global skrl variable + if args_cli.ml_framework.startswith("jax"): + skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy" + + # randomly sample a seed if seed = -1 + if args_cli.seed == -1: + args_cli.seed = random.randint(0, 10000) + + # set the agent and environment seed from command line + # note: certain randomization occur in the environment initialization so we set the seed here + agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"] + env_cfg.seed = agent_cfg["seed"] + + # specify directory for logging experiments + log_root_path = os.path.join("logs", "skrl", agent_cfg["agent"]["experiment"]["directory"]) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + # specify directory for logging runs: {time-stamp}_{run_name} + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_{algorithm}_{args_cli.ml_framework}" + # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) + print(f"Exact experiment name requested from command line: {log_dir}") + if agent_cfg["agent"]["experiment"]["experiment_name"]: + log_dir += f'_{agent_cfg["agent"]["experiment"]["experiment_name"]}' + # set directory into agent config + agent_cfg["agent"]["experiment"]["directory"] = log_root_path + agent_cfg["agent"]["experiment"]["experiment_name"] = log_dir + # update log_dir + log_dir = os.path.join(log_root_path, log_dir) + + # dump the configuration into log-directory + dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) + + # get checkpoint path (to resume training) + resume_path = retrieve_file_path(args_cli.checkpoint) if args_cli.checkpoint else None + + # set the IO descriptors export flag if requested + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.export_io_descriptors = args_cli.export_io_descriptors + else: + omni.log.warn( + "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." + ) + + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + + # create isaac environment + env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + + # wrap for video recording + if args_cli.video: + video_kwargs = { + "video_folder": os.path.join(log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_length": args_cli.video_length, + "disable_logger": True, + } + print("[INFO] Recording videos during training.") + print_dict(video_kwargs, nesting=4) + env = gym.wrappers.RecordVideo(env, **video_kwargs) + + # wrap around environment for skrl + env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")` + + # configure and instantiate the skrl runner + # https://skrl.readthedocs.io/en/latest/api/utils/runner.html + runner = Runner(env, agent_cfg) + + # load checkpoint (if specified) + if resume_path: + print(f"[INFO] Loading model checkpoint from: {resume_path}") + runner.agent.load(resume_path) + + # run training + runner.run() + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/scripts/zero_agent.py b/scripts/zero_agent.py new file mode 100644 index 0000000..c296650 --- /dev/null +++ b/scripts/zero_agent.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to run an environment with zero action agent.""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse + +from isaaclab.app import AppLauncher + +# add argparse arguments +parser = argparse.ArgumentParser(description="Zero agent for Isaac Lab environments.") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli = parser.parse_args() + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import torch + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import parse_env_cfg + +import mindbot.tasks # noqa: F401 + + +def main(): + """Zero actions agent with Isaac Lab environment.""" + # parse configuration + env_cfg = parse_env_cfg( + args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric + ) + # create environment + env = gym.make(args_cli.task, cfg=env_cfg) + + # print info (this is vectorized environment) + print(f"[INFO]: Gym observation space: {env.observation_space}") + print(f"[INFO]: Gym action space: {env.action_space}") + # reset environment + env.reset() + # simulate environment + while simulation_app.is_running(): + # run everything in inference mode + with torch.inference_mode(): + # compute zero actions + actions = torch.zeros(env.action_space.shape, device=env.unwrapped.device) + # apply actions + env.step(actions) + + # close the simulator + env.close() + + +if __name__ == "__main__": + # run the main function + main() + # close sim app + simulation_app.close() diff --git a/source/mindbot/config/extension.toml b/source/mindbot/config/extension.toml new file mode 100644 index 0000000..831070b --- /dev/null +++ b/source/mindbot/config/extension.toml @@ -0,0 +1,35 @@ +[package] + +# Semantic Versioning is used: https://semver.org/ +version = "0.1.0" + +# Description +category = "isaaclab" +readme = "README.md" + +title = "Extension Template" +author = "Isaac Lab Project Developers" +maintainer = "Isaac Lab Project Developers" +description="Extension Template for Isaac Lab" +repository = "https://github.com/isaac-sim/IsaacLab.git" +keywords = ["extension", "template", "isaaclab"] + +[dependencies] +"isaaclab" = {} +"isaaclab_assets" = {} +"isaaclab_mimic" = {} +"isaaclab_rl" = {} +"isaaclab_tasks" = {} +# NOTE: Add additional dependencies here + +[[python.module]] +name = "mindbot" + +[isaac_lab_settings] +# TODO: Uncomment and list any apt dependencies here. +# If none, leave it commented out. +# apt_deps = ["example_package"] +# TODO: Uncomment and provide path to a ros_ws +# with rosdeps to be installed. If none, +# leave it commented out. +# ros_ws = "path/from/extension_root/to/ros_ws" \ No newline at end of file diff --git a/source/mindbot/docs/CHANGELOG.rst b/source/mindbot/docs/CHANGELOG.rst new file mode 100644 index 0000000..03a8463 --- /dev/null +++ b/source/mindbot/docs/CHANGELOG.rst @@ -0,0 +1,10 @@ +Changelog +--------- + +0.1.0 (2025-11-13) +~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Created an initial template for building an extension or project based on Isaac Lab \ No newline at end of file diff --git a/source/mindbot/mindbot/__init__.py b/source/mindbot/mindbot/__init__.py new file mode 100644 index 0000000..6705ede --- /dev/null +++ b/source/mindbot/mindbot/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Python module serving as a project/extension template. +""" + +# Register Gym environments. +from .tasks import * + +# Register UI extensions. +from .ui_extension_example import * diff --git a/source/mindbot/mindbot/tasks/__init__.py b/source/mindbot/mindbot/tasks/__init__.py new file mode 100644 index 0000000..13df3c3 --- /dev/null +++ b/source/mindbot/mindbot/tasks/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Package containing task implementations for the extension.""" + +## +# Register Gym environments. +## + +from isaaclab_tasks.utils import import_packages + +# The blacklist is used to prevent importing configs from sub-packages +_BLACKLIST_PKGS = ["utils", ".mdp"] +# Import all configs in this package +import_packages(__name__, _BLACKLIST_PKGS) diff --git a/source/mindbot/mindbot/tasks/direct/__init__.py b/source/mindbot/mindbot/tasks/direct/__init__.py new file mode 100644 index 0000000..65d6e5a --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym # noqa: F401 diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/__init__.py b/source/mindbot/mindbot/tasks/direct/mindbot/__init__.py new file mode 100644 index 0000000..39c6399 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Template-Mindbot-Direct-v0", + entry_point=f"{__name__}.mindbot_env:MindbotEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/__init__.py b/source/mindbot/mindbot/tasks/direct/mindbot/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/direct/mindbot/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..4556af6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 150 + save_interval = 50 + experiment_name = "cartpole_direct" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + actor_hidden_dims=[32, 32], + critic_hidden_dims=[32, 32], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env.py b/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env.py new file mode 100644 index 0000000..ee993c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env.py @@ -0,0 +1,135 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import math +import torch +from collections.abc import Sequence + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation +from isaaclab.envs import DirectRLEnv +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane +from isaaclab.utils.math import sample_uniform + +from .mindbot_env_cfg import MindbotEnvCfg + + +class MindbotEnv(DirectRLEnv): + cfg: MindbotEnvCfg + + def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name) + self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name) + + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + + def _setup_scene(self): + self.robot = Articulation(self.cfg.robot_cfg) + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate + self.scene.clone_environments(copy_from_source=False) + # we need to explicitly filter collisions for CPU simulation + if self.device == "cpu": + self.scene.filter_collisions(global_prim_paths=[]) + # add articulation to scene + self.scene.articulations["robot"] = self.robot + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: torch.Tensor) -> None: + self.actions = actions.clone() + + def _apply_action(self) -> None: + self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx) + + def _get_observations(self) -> dict: + obs = torch.cat( + ( + self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1), + self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1), + self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1), + self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1), + ), + dim=-1, + ) + observations = {"policy": obs} + return observations + + def _get_rewards(self) -> torch.Tensor: + total_reward = compute_rewards( + self.cfg.rew_scale_alive, + self.cfg.rew_scale_terminated, + self.cfg.rew_scale_pole_pos, + self.cfg.rew_scale_cart_vel, + self.cfg.rew_scale_pole_vel, + self.joint_pos[:, self._pole_dof_idx[0]], + self.joint_vel[:, self._pole_dof_idx[0]], + self.joint_pos[:, self._cart_dof_idx[0]], + self.joint_vel[:, self._cart_dof_idx[0]], + self.reset_terminated, + ) + return total_reward + + def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]: + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + + time_out = self.episode_length_buf >= self.max_episode_length - 1 + out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1) + out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1) + return out_of_bounds, time_out + + def _reset_idx(self, env_ids: Sequence[int] | None): + if env_ids is None: + env_ids = self.robot._ALL_INDICES + super()._reset_idx(env_ids) + + joint_pos = self.robot.data.default_joint_pos[env_ids] + joint_pos[:, self._pole_dof_idx] += sample_uniform( + self.cfg.initial_pole_angle_range[0] * math.pi, + self.cfg.initial_pole_angle_range[1] * math.pi, + joint_pos[:, self._pole_dof_idx].shape, + joint_pos.device, + ) + joint_vel = self.robot.data.default_joint_vel[env_ids] + + default_root_state = self.robot.data.default_root_state[env_ids] + default_root_state[:, :3] += self.scene.env_origins[env_ids] + + self.joint_pos[env_ids] = joint_pos + self.joint_vel[env_ids] = joint_vel + + self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + + +@torch.jit.script +def compute_rewards( + rew_scale_alive: float, + rew_scale_terminated: float, + rew_scale_pole_pos: float, + rew_scale_cart_vel: float, + rew_scale_pole_vel: float, + pole_pos: torch.Tensor, + pole_vel: torch.Tensor, + cart_pos: torch.Tensor, + cart_vel: torch.Tensor, + reset_terminated: torch.Tensor, +): + rew_alive = rew_scale_alive * (1.0 - reset_terminated.float()) + rew_termination = rew_scale_terminated * reset_terminated.float() + rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) + rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1) + rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1) + total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel + return total_reward \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env_cfg.py b/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env_cfg.py new file mode 100644 index 0000000..bec1dae --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env_cfg.py @@ -0,0 +1,48 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_assets.robots.cartpole import CARTPOLE_CFG + +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + + +@configclass +class MindbotEnvCfg(DirectRLEnvCfg): + # env + decimation = 2 + episode_length_s = 5.0 + # - spaces definition + action_space = 1 + observation_space = 4 + state_space = 0 + + # simulation + sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) + + # robot(s) + robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot") + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True) + + # custom parameters/scales + # - controllable joint + cart_dof_name = "slider_to_cart" + pole_dof_name = "cart_to_pole" + # - action scale + action_scale = 100.0 # [N] + # - reward scales + rew_scale_alive = 1.0 + rew_scale_terminated = -2.0 + rew_scale_pole_pos = -1.0 + rew_scale_cart_vel = -0.01 + rew_scale_pole_vel = -0.005 + # - reset states/conditions + initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad] + max_cart_pos = 3.0 # reset if cart exceeds this position [m] \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/__init__.py b/source/mindbot/mindbot/tasks/direct/mindbot_marl/__init__.py new file mode 100644 index 0000000..71d286a --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Template-Mindbot-Marl-Direct-v0", + entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/__init__.py b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..4556af6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 150 + save_interval = 50 + experiment_name = "cartpole_direct" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + actor_hidden_dims=[32, 32], + critic_hidden_dims=[32, 32], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env.py b/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env.py new file mode 100644 index 0000000..a00dc6d --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env.py @@ -0,0 +1,184 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import math +import torch +from collections.abc import Sequence + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation +from isaaclab.envs import DirectMARLEnv +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane +from isaaclab.utils.math import sample_uniform + +from .mindbot_marl_env_cfg import MindbotMarlEnvCfg + + +class MindbotMarlEnv(DirectMARLEnv): + cfg: MindbotMarlEnvCfg + + def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name) + self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name) + self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name) + + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + + def _setup_scene(self): + self.robot = Articulation(self.cfg.robot_cfg) + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate + self.scene.clone_environments(copy_from_source=False) + # we need to explicitly filter collisions for CPU simulation + if self.device == "cpu": + self.scene.filter_collisions(global_prim_paths=[]) + # add articulation to scene + self.scene.articulations["robot"] = self.robot + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None: + self.actions = actions + + def _apply_action(self) -> None: + self.robot.set_joint_effort_target( + self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx + ) + self.robot.set_joint_effort_target( + self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx + ) + + def _get_observations(self) -> dict[str, torch.Tensor]: + pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1)) + pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1)) + observations = { + "cart": torch.cat( + ( + self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1), + self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1), + pole_joint_pos, + self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1), + ), + dim=-1, + ), + "pendulum": torch.cat( + ( + pole_joint_pos + pendulum_joint_pos, + pendulum_joint_pos, + self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1), + ), + dim=-1, + ), + } + return observations + + def _get_rewards(self) -> dict[str, torch.Tensor]: + total_reward = compute_rewards( + self.cfg.rew_scale_alive, + self.cfg.rew_scale_terminated, + self.cfg.rew_scale_cart_pos, + self.cfg.rew_scale_cart_vel, + self.cfg.rew_scale_pole_pos, + self.cfg.rew_scale_pole_vel, + self.cfg.rew_scale_pendulum_pos, + self.cfg.rew_scale_pendulum_vel, + self.joint_pos[:, self._cart_dof_idx[0]], + self.joint_vel[:, self._cart_dof_idx[0]], + normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]), + self.joint_vel[:, self._pole_dof_idx[0]], + normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]), + self.joint_vel[:, self._pendulum_dof_idx[0]], + math.prod(self.terminated_dict.values()), + ) + return total_reward + + def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + + time_out = self.episode_length_buf >= self.max_episode_length - 1 + out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1) + out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1) + + terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents} + time_outs = {agent: time_out for agent in self.cfg.possible_agents} + return terminated, time_outs + + def _reset_idx(self, env_ids: Sequence[int] | None): + if env_ids is None: + env_ids = self.robot._ALL_INDICES + super()._reset_idx(env_ids) + + joint_pos = self.robot.data.default_joint_pos[env_ids] + joint_pos[:, self._pole_dof_idx] += sample_uniform( + self.cfg.initial_pole_angle_range[0] * math.pi, + self.cfg.initial_pole_angle_range[1] * math.pi, + joint_pos[:, self._pole_dof_idx].shape, + joint_pos.device, + ) + joint_pos[:, self._pendulum_dof_idx] += sample_uniform( + self.cfg.initial_pendulum_angle_range[0] * math.pi, + self.cfg.initial_pendulum_angle_range[1] * math.pi, + joint_pos[:, self._pendulum_dof_idx].shape, + joint_pos.device, + ) + joint_vel = self.robot.data.default_joint_vel[env_ids] + + default_root_state = self.robot.data.default_root_state[env_ids] + default_root_state[:, :3] += self.scene.env_origins[env_ids] + + self.joint_pos[env_ids] = joint_pos + self.joint_vel[env_ids] = joint_vel + + self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + + +@torch.jit.script +def normalize_angle(angle): + return (angle + math.pi) % (2 * math.pi) - math.pi + + +@torch.jit.script +def compute_rewards( + rew_scale_alive: float, + rew_scale_terminated: float, + rew_scale_cart_pos: float, + rew_scale_cart_vel: float, + rew_scale_pole_pos: float, + rew_scale_pole_vel: float, + rew_scale_pendulum_pos: float, + rew_scale_pendulum_vel: float, + cart_pos: torch.Tensor, + cart_vel: torch.Tensor, + pole_pos: torch.Tensor, + pole_vel: torch.Tensor, + pendulum_pos: torch.Tensor, + pendulum_vel: torch.Tensor, + reset_terminated: torch.Tensor, +): + rew_alive = rew_scale_alive * (1.0 - reset_terminated.float()) + rew_termination = rew_scale_terminated * reset_terminated.float() + rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) + rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum( + torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1 + ) + rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1) + rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1) + rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1) + + total_reward = { + "cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel, + "pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel, + } + return total_reward \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env_cfg.py b/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env_cfg.py new file mode 100644 index 0000000..8925489 --- /dev/null +++ b/source/mindbot/mindbot/tasks/direct/mindbot_marl/mindbot_marl_env_cfg.py @@ -0,0 +1,55 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG + +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import DirectMARLEnvCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + + +@configclass +class MindbotMarlEnvCfg(DirectMARLEnvCfg): + # env + decimation = 2 + episode_length_s = 5.0 + # multi-agent specification and spaces definition + possible_agents = ["cart", "pendulum"] + action_spaces = {"cart": 1, "pendulum": 1} + observation_spaces = {"cart": 4, "pendulum": 3} + state_space = -1 + + # simulation + sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) + + # robot(s) + robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot") + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True) + + # custom parameters/scales + # - controllable joint + cart_dof_name = "slider_to_cart" + pole_dof_name = "cart_to_pole" + pendulum_dof_name = "pole_to_pendulum" + # - action scale + cart_action_scale = 100.0 # [N] + pendulum_action_scale = 50.0 # [Nm] + # - reward scales + rew_scale_alive = 1.0 + rew_scale_terminated = -2.0 + rew_scale_cart_pos = 0 + rew_scale_cart_vel = -0.01 + rew_scale_pole_pos = -1.0 + rew_scale_pole_vel = -0.01 + rew_scale_pendulum_pos = -1.0 + rew_scale_pendulum_vel = -0.01 + # - reset states/conditions + initial_pendulum_angle_range = [-0.25, 0.25] # pendulum angle sample range on reset [rad] + initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad] + max_cart_pos = 3.0 # reset if cart exceeds this position [m] \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/__init__.py b/source/mindbot/mindbot/tasks/manager_based/__init__.py new file mode 100644 index 0000000..65d6e5a --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym # noqa: F401 diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/__init__.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/__init__.py new file mode 100644 index 0000000..b99adc5 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + + +gym.register( + id="Template-Mindbot-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg", + "skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml", + "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/__init__.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/__init__.py new file mode 100644 index 0000000..a597dfa --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000..71216e6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,78 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [32, 32] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: cartpole_direct + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.1 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 150 + save_best_after: 50 + save_frequency: 25 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..4556af6 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 150 + save_interval = 50 + experiment_name = "cartpole_direct" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + actor_hidden_dims=[32, 32], + critic_hidden_dims=[32, 32], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000..23ed0c0 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,20 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: + activation_fn: nn.ELU + net_arch: [32, 32] + squash_output: False +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml new file mode 100644 index 0000000..3a1fd21 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_amp_cfg.yaml @@ -0,0 +1,111 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: -2.9 + fixed_log_std: True + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + discriminator: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [1024, 512] + activations: relu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + +# AMP memory (reference motion dataset) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +motion_dataset: + class: RandomMemory + memory_size: 200000 + +# AMP memory (preventing discriminator overfitting) +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +reply_buffer: + class: RandomMemory + memory_size: 1000000 + + +# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/amp.html +agent: + class: AMP + rollouts: 16 + learning_epochs: 6 + mini_batches: 2 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-05 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + amp_state_preprocessor: RunningStandardScaler + amp_state_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 0.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.5 + discriminator_loss_scale: 5.0 + amp_batch_size: 512 + task_reward_weight: 0.0 + style_reward_weight: 1.0 + discriminator_batch_size: 4096 + discriminator_reward_scale: 2.0 + discriminator_logit_regularization_scale: 0.05 + discriminator_gradient_penalty_scale: 5.0 + discriminator_weight_decay_scale: 1.0e-04 + # rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "humanoid_amp_run" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml new file mode 100644 index 0000000..2f46b1c --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ippo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html +agent: + class: IPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml new file mode 100644 index 0000000..720c927 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 8 + mini_batches: 1 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 3.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cart_double_pendulum_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml new file mode 100644 index 0000000..ab6674d --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,80 @@ +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [32, 32] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 32 + learning_epochs: 8 + mini_batches: 8 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.008 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.1 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "cartpole_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 4800 + environment_info: log \ No newline at end of file diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/__init__.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/__init__.py new file mode 100644 index 0000000..6b43c27 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the environment.""" + +from isaaclab.envs.mdp import * # noqa: F401, F403 + +from .rewards import * # noqa: F401, F403 diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py new file mode 100644 index 0000000..ceb3956 --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mdp/rewards.py @@ -0,0 +1,26 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import wrap_to_pi + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize joint position deviation from a target value.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + # wrap the joint positions to (-pi, pi) + joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids]) + # compute the reward + return torch.sum(torch.square(joint_pos - target), dim=1) diff --git a/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py b/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py new file mode 100644 index 0000000..baeb35a --- /dev/null +++ b/source/mindbot/mindbot/tasks/manager_based/mindbot/mindbot_env_cfg.py @@ -0,0 +1,180 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass + +from . import mdp + +## +# Pre-defined configs +## + +from isaaclab_assets.robots.cartpole import CARTPOLE_CFG # isort:skip + + +## +# Scene definition +## + + +@configclass +class MindbotSceneCfg(InteractiveSceneCfg): + """Configuration for a cart-pole scene.""" + + # ground plane + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)), + ) + + # robot + robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # lights + dome_light = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0), + ) + + +## +# MDP settings +## + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel) + joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # reset + reset_cart_position = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), + "position_range": (-1.0, 1.0), + "velocity_range": (-0.5, 0.5), + }, + ) + + reset_pole_position = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), + "position_range": (-0.25 * math.pi, 0.25 * math.pi), + "velocity_range": (-0.25 * math.pi, 0.25 * math.pi), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # (1) Constant running reward + alive = RewTerm(func=mdp.is_alive, weight=1.0) + # (2) Failure penalty + terminating = RewTerm(func=mdp.is_terminated, weight=-2.0) + # (3) Primary task: keep pole upright + pole_pos = RewTerm( + func=mdp.joint_pos_target_l2, + weight=-1.0, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0}, + ) + # (4) Shaping tasks: lower cart velocity + cart_vel = RewTerm( + func=mdp.joint_vel_l1, + weight=-0.01, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])}, + ) + # (5) Shaping tasks: lower pole angular velocity + pole_vel = RewTerm( + func=mdp.joint_vel_l1, + weight=-0.005, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])}, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + # (1) Time out + time_out = DoneTerm(func=mdp.time_out, time_out=True) + # (2) Cart out of bounds + cart_out_of_bounds = DoneTerm( + func=mdp.joint_pos_out_of_manual_limit, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)}, + ) + + +## +# Environment configuration +## + + +@configclass +class MindbotEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + events: EventCfg = EventCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 2 + self.episode_length_s = 5 + # viewer settings + self.viewer.eye = (8.0, 0.0, 5.0) + # simulation settings + self.sim.dt = 1 / 120 + self.sim.render_interval = self.decimation \ No newline at end of file diff --git a/source/mindbot/mindbot/ui_extension_example.py b/source/mindbot/mindbot/ui_extension_example.py new file mode 100644 index 0000000..6f8e381 --- /dev/null +++ b/source/mindbot/mindbot/ui_extension_example.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import omni.ext + + +# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)` +def some_public_function(x: int): + print("[mindbot] some_public_function was called with x: ", x) + return x**x + + +# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be +# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled +# on_shutdown() is called. +class ExampleExtension(omni.ext.IExt): + # ext_id is current extension id. It can be used with extension manager to query additional information, like where + # this extension is located on filesystem. + def on_startup(self, ext_id): + print("[mindbot] startup") + + self._count = 0 + + self._window = omni.ui.Window("My Window", width=300, height=300) + with self._window.frame: + with omni.ui.VStack(): + label = omni.ui.Label("") + + def on_click(): + self._count += 1 + label.text = f"count: {self._count}" + + def on_reset(): + self._count = 0 + label.text = "empty" + + on_reset() + + with omni.ui.HStack(): + omni.ui.Button("Add", clicked_fn=on_click) + omni.ui.Button("Reset", clicked_fn=on_reset) + + def on_shutdown(self): + print("[mindbot] shutdown") \ No newline at end of file diff --git a/source/mindbot/pyproject.toml b/source/mindbot/pyproject.toml new file mode 100644 index 0000000..d90ac35 --- /dev/null +++ b/source/mindbot/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "toml"] +build-backend = "setuptools.build_meta" diff --git a/source/mindbot/setup.py b/source/mindbot/setup.py new file mode 100644 index 0000000..ba94f63 --- /dev/null +++ b/source/mindbot/setup.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Installation script for the 'mindbot' python package.""" + +import os +import toml + +from setuptools import setup + +# Obtain the extension data from the extension.toml file +EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) +# Read the extension.toml file +EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) + +# Minimum dependencies required prior to installation +INSTALL_REQUIRES = [ + # NOTE: Add dependencies + "psutil", +] + +# Installation operation +setup( + name="mindbot", + packages=["mindbot"], + author=EXTENSION_TOML_DATA["package"]["author"], + maintainer=EXTENSION_TOML_DATA["package"]["maintainer"], + url=EXTENSION_TOML_DATA["package"]["repository"], + version=EXTENSION_TOML_DATA["package"]["version"], + description=EXTENSION_TOML_DATA["package"]["description"], + keywords=EXTENSION_TOML_DATA["package"]["keywords"], + install_requires=INSTALL_REQUIRES, + license="Apache-2.0", + include_package_data=True, + python_requires=">=3.10", + classifiers=[ + "Natural Language :: English", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Isaac Sim :: 4.5.0", + "Isaac Sim :: 5.0.0", + "Isaac Sim :: 5.1.0", + ], + zip_safe=False, +) \ No newline at end of file