Initial commit

This commit is contained in:
2025-11-13 17:37:07 +08:00
commit 1f7053a306
67 changed files with 6071 additions and 0 deletions

27
.dockerignore Normal file
View File

@@ -0,0 +1,27 @@
# ignore .git related folders
.git/
.github/
.gitignore
# ignore docs
docs/
# copy in licenses folder to the container
!docs/licenses/
# ignore logs
**/logs/
**/runs/
**/output/*
**/outputs/*
**/videos/*
*.tmp
# ignore docker
docker/cluster/exports/
docker/.container.cfg
# ignore recordings
recordings/
# ignore __pycache__
**/__pycache__/
**/*.egg-info/
# ignore isaac sim symlink
_isaac_sim?
# Docker history
docker/.isaac-lab-docker-history

23
.flake8 Normal file
View File

@@ -0,0 +1,23 @@
[flake8]
show-source=True
statistics=True
per-file-ignores=*/__init__.py:F401
# E402: Module level import not at top of file
# E501: Line too long
# W503: Line break before binary operator
# E203: Whitespace before ':' -> conflicts with black
# D401: First line should be in imperative mood
# R504: Unnecessary variable assignment before return statement.
# R505: Unnecessary elif after return statement
# SIM102: Use a single if-statement instead of nested if-statements
# SIM117: Merge with statements for context managers that have same scope.
# SIM118: Checks for key-existence checks against dict.keys() calls.
ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117,SIM118
max-line-length = 120
max-complexity = 30
exclude=_*,.vscode,.git,docs/**
# docstrings
docstring-convention=google
# annotations
suppress-none-returning=True
allow-star-arg-any=True

14
.gitattributes vendored Normal file
View File

@@ -0,0 +1,14 @@
*.usd filter=lfs diff=lfs merge=lfs -text
*.usda filter=lfs diff=lfs merge=lfs -text
*.psd filter=lfs diff=lfs merge=lfs -text
*.hdr filter=lfs diff=lfs merge=lfs -text
*.dae filter=lfs diff=lfs merge=lfs -text
*.mtl filter=lfs diff=lfs merge=lfs -text
*.obj filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.mp4 filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.jit filter=lfs diff=lfs merge=lfs -text
*.hdf5 filter=lfs diff=lfs merge=lfs -text
*.bat text eol=crlf

71
.gitignore vendored Normal file
View File

@@ -0,0 +1,71 @@
# C++
**/cmake-build*/
**/build*/
**/*.so
**/*.log*
# Omniverse
**/*.dmp
**/.thumbs
# No USD files allowed in the repo
**/*.usd
**/*.usda
**/*.usdc
**/*.usdz
# Python
.DS_Store
**/*.egg-info/
**/__pycache__/
**/.pytest_cache/
**/*.pyc
**/*.pb
# Docker/Singularity
**/*.sif
docker/cluster/exports/
docker/.container.cfg
# IDE
**/.idea/
**/.vscode/
# Don't ignore the top-level .vscode directory as it is
# used to configure VS Code settings
!.vscode
# Outputs
**/output/*
**/outputs/*
**/videos/*
**/wandb/*
**/.neptune/*
docker/artifacts/
*.tmp
# Doc Outputs
**/docs/_build/*
**/generated/*
# Isaac-Sim packman
_isaac_sim*
_repo
_build
.lastformat
# RL-Games
**/runs/*
**/logs/*
**/recordings/*
# Pre-Trained Checkpoints
/.pretrained_checkpoints/
# Teleop Recorded Dataset
/datasets/
# Tests
tests/
# Docker history
.isaac-lab-docker-history

86
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,86 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
repos:
- repo: https://github.com/python/black
rev: 24.3.0
hooks:
- id: black
args: ["--line-length", "120", "--unstable"]
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
hooks:
- id: flake8
additional_dependencies: [flake8-simplify, flake8-return]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: check-symlinks
- id: destroyed-symlinks
- id: check-added-large-files
args: ["--maxkb=2000"] # restrict files more than 2 MB. Should use git-lfs instead.
- id: check-yaml
- id: check-merge-conflict
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-toml
- id: end-of-file-fixer
- id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: debug-statements
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.1
hooks:
- id: pyupgrade
args: ["--py310-plus"]
# FIXME: This is a hack because Pytorch does not like: torch.Tensor | dict aliasing
exclude: "source/isaaclab/isaaclab/envs/common.py|source/isaaclab/isaaclab/ui/widgets/image_plot.py|source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/motions/motion_loader.py"
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
additional_dependencies:
- tomli
exclude: "CONTRIBUTORS.md|docs/source/setup/walkthrough/concepts_env_design.rst"
# FIXME: Figure out why this is getting stuck under VPN.
# - repo: https://github.com/RobertCraigie/pyright-python
# rev: v1.1.315
# hooks:
# - id: pyright
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.1
hooks:
- id: insert-license
files: \.(py|ya?ml)$
args:
# - --remove-header # Remove existing license headers. Useful when updating license.
- --license-filepath
- .github/LICENSE_HEADER.txt
- --use-current-year
exclude: "source/isaaclab_mimic/|scripts/imitation_learning/isaaclab_mimic/"
# Apache 2.0 license for mimic files
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.1
hooks:
- id: insert-license
files: ^(source/isaaclab_mimic|scripts/imitation_learning/isaaclab_mimic)/.*\.py$
args:
# - --remove-header # Remove existing license headers. Useful when updating license.
- --license-filepath
- .github/LICENSE_HEADER_MIMIC.txt
- --use-current-year
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
- id: rst-backticks
- id: rst-directive-colons
- id: rst-inline-touching-normal

10
.vscode/.gitignore vendored Normal file
View File

@@ -0,0 +1,10 @@
# Note: These files are kept for development purposes only.
!tools/launch.template.json
!tools/settings.template.json
!tools/setup_vscode.py
!extensions.json
!tasks.json
# Ignore all other files
.python.env
*.json

12
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,12 @@
{
// See http://go.microsoft.com/fwlink/?LinkId=827846
// for the documentation about the extensions.json format
"recommendations": [
"ms-python.python",
"ms-python.vscode-pylance",
"ban.spellright",
"ms-iot.vscode-ros",
"ms-python.black-formatter",
"ms-python.flake8",
]
}

23
.vscode/tasks.json vendored Normal file
View File

@@ -0,0 +1,23 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "setup_python_env",
"type": "shell",
"linux": {
"command": "${input:isaac_path}/python.sh ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
},
"windows": {
"command": "${input:isaac_path}/python.bat ${workspaceFolder}/.vscode/tools/setup_vscode.py --isaac_path ${input:isaac_path}"
}
}
],
"inputs": [
{
"id": "isaac_path",
"description": "Absolute path to the current Isaac Sim installation. If you installed IsaacSim from pip, the import of it failed. Please make sure you run the task with the correct python environment. As fallback, you can directly execute the python script by running: ``python.sh <path-to-your-project>/.vscode/tools/setup_vscode.py``",
"default": "${HOME}/isaacsim",
"type": "promptString"
},
]
}

825
.vscode/tools/launch.template.json vendored Normal file
View File

@@ -0,0 +1,825 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
// For standalone script execution
{
"name": "Python: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
},
{
"name": "Python: Train Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
},
{
"name": "Python: Play Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
},
// For script execution inside a Docker
{
"name": "Docker: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
}
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-Marl-Direct-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-Marl-Direct-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rl_games/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with rl_games (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rl_games/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/rsl_rl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with rsl_rl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/rsl_rl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (AMP)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "AMP"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (IPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "IPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (MAPPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "MAPPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with skrl (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32", "--algorithm", "PPO"],
"program": "${workspaceFolder}/scripts/skrl/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Train Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "4096", "--headless"],
"program": "${workspaceFolder}/scripts/sb3/train.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
{
"name": "Docker: Play Template-Mindbot-v0 with sb3 (PPO)",
"type": "debugpy",
"request": "launch",
"args" : ["--task", "Template-Mindbot-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/scripts/sb3/play.py",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}"
},
},
]
}

86
.vscode/tools/settings.template.json vendored Normal file
View File

@@ -0,0 +1,86 @@
{
"files.associations": {
"*.tpp": "cpp",
"*.kit": "toml",
"*.rst": "restructuredtext"
},
"editor.rulers": [120],
// files to be ignored by the linter
"files.watcherExclude": {
"**/.git/objects/**": true,
"**/.git/subtree-cache/**": true,
"**/node_modules/**": true,
"**/_isaac_sim/**": true,
"**/_compiler/**": true
},
// Configuration for spelling checker
"spellright.language": [
"en-US-10-1."
],
"spellright.documentTypes": [
"markdown",
"latex",
"plaintext",
"cpp",
"asciidoc",
"python",
"restructuredtext"
],
"cSpell.words": [
"literalinclude",
"linenos",
"instanceable",
"isaacSim",
"jacobians",
"pointcloud",
"ridgeback",
"rllib",
"robomimic",
"teleoperation",
"xform",
"numpy",
"tensordict",
"flatcache",
"physx",
"dpad",
"gamepad",
"linspace",
"upsampled",
"downsampled",
"arange",
"discretization",
"trimesh",
"uninstanceable"
],
// This enables python language server. Seems to work slightly better than jedi:
"python.languageServer": "Pylance",
// We use "black" as a formatter:
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "120"],
// Use flake8 for linting
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": true,
"python.linting.flake8Args": [
"--max-line-length=120"
],
// Use docstring generator
"autoDocstring.docstringFormat": "google",
"autoDocstring.guessTypes": true,
// Python environment path
// note: the default interpreter is overridden when user selects a workspace interpreter
// in the status bar. For example, the virtual environment python interpreter
"python.defaultInterpreterPath": "",
// ROS distribution
"ros.distro": "noetic",
// Language specific settings
"[python]": {
"editor.tabSize": 4
},
"[restructuredtext]": {
"editor.tabSize": 2
},
// Python extra paths
// Note: this is filled up when vscode is set up for the first time
"python.analysis.extraPaths": []
}

220
.vscode/tools/setup_vscode.py vendored Normal file
View File

@@ -0,0 +1,220 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This script sets up the vs-code settings for the Isaac Lab project.
This script merges the python.analysis.extraPaths from the "{ISAACSIM_DIR}/.vscode/settings.json" file into
the ".vscode/settings.json" file.
This is necessary because Isaac Sim 2022.2.1 onwards does not add the necessary python packages to the python path
when the "setup_python_env.sh" is run as part of the vs-code launch configuration.
"""
import argparse
import os
import pathlib
import platform
import re
import sys
PROJECT_DIR = pathlib.Path(__file__).parents[2]
"""Path to the the project directory."""
try:
import isaacsim # noqa: F401
isaacsim_dir = os.environ.get("ISAAC_PATH", "")
except ModuleNotFoundError or ImportError:
# Create a parser to get the isaac-sim path
parser = argparse.ArgumentParser(description="Setup the VSCode settings for the project.")
parser.add_argument("--isaac_path", type=str, help="The absolute path to the Isaac Sim installation.")
args = parser.parse_args()
# parse the isaac-sim directory
isaacsim_dir = args.isaac_path
# check if the isaac-sim directory is provided
if not os.path.exists(isaacsim_dir):
raise FileNotFoundError(
f"Could not find the isaac-sim directory: {isaacsim_dir}. Please provide the correct path to the Isaac Sim"
" installation."
)
except EOFError:
print("Unable to trigger EULA acceptance. This is likely due to the script being run in a non-interactive shell.")
print("Please run the script in an interactive shell to accept the EULA.")
print("Skipping the setup of the VSCode settings...")
sys.exit(0)
# check if the isaac-sim directory exists
if not os.path.exists(isaacsim_dir):
raise FileNotFoundError(
f"Could not find the isaac-sim directory: {isaacsim_dir}. There are two possible reasons for this:\n\t1. The"
" Isaac Sim directory does not exist as provided CLI path.\n\t2. The script couldn't import the 'isaacsim'"
" package. This could be due to the 'isaacsim' package not being installed in the Python"
" environment.\n\nPlease make sure that the Isaac Sim directory exists or that the 'isaacsim' package is"
" installed."
)
ISAACSIM_DIR = isaacsim_dir
"""Path to the isaac-sim directory."""
def overwrite_python_analysis_extra_paths(isaaclab_settings: str) -> str:
"""Overwrite the python.analysis.extraPaths in the Isaac Lab settings file.
The extraPaths are replaced with the path names from the isaac-sim settings file that exists in the
"{ISAACSIM_DIR}/.vscode/settings.json" file.
If the isaac-sim settings file does not exist, the extraPaths are not overwritten.
Args:
isaaclab_settings: The settings string to use as template.
Returns:
The settings string with overwritten python analysis extra paths.
"""
# isaac-sim settings
isaacsim_vscode_filename = os.path.join(ISAACSIM_DIR, ".vscode", "settings.json")
# we use the isaac-sim settings file to get the python.analysis.extraPaths for kit extensions
# if this file does not exist, we will not add any extra paths
if os.path.exists(isaacsim_vscode_filename):
# read the path names from the isaac-sim settings file
with open(isaacsim_vscode_filename) as f:
vscode_settings = f.read()
# extract the path names
# search for the python.analysis.extraPaths section and extract the contents
settings = re.search(
r"\"python.analysis.extraPaths\": \[.*?\]", vscode_settings, flags=re.MULTILINE | re.DOTALL
)
settings = settings.group(0)
settings = settings.split('"python.analysis.extraPaths": [')[-1]
settings = settings.split("]")[0]
# read the path names from the isaac-sim settings file
path_names = settings.split(",")
path_names = [path_name.strip().strip('"') for path_name in path_names]
path_names = [path_name for path_name in path_names if len(path_name) > 0]
# change the path names to be relative to the Isaac Lab directory
rel_path = os.path.relpath(ISAACSIM_DIR, PROJECT_DIR)
path_names = ['"${workspaceFolder}/' + rel_path + "/" + path_name + '"' for path_name in path_names]
else:
path_names = []
print(
f"[WARN] Could not find Isaac Sim VSCode settings: {isaacsim_vscode_filename}."
"\n\tThis will result in missing 'python.analysis.extraPaths' in the VSCode"
"\n\tsettings, which limits the functionality of the Python language server."
"\n\tHowever, it does not affect the functionality of the Isaac Lab project."
"\n\tWe are working on a fix for this issue with the Isaac Sim team."
)
# add the path names that are in the Isaac Lab extensions directory
isaaclab_extensions = os.listdir(os.path.join(PROJECT_DIR, "source"))
path_names.extend(['"${workspaceFolder}/source/' + ext + '"' for ext in isaaclab_extensions])
# combine them into a single string
path_names = ",\n\t\t".expandtabs(4).join(path_names)
# deal with the path separator being different on Windows and Unix
path_names = path_names.replace("\\", "/")
# replace the path names in the Isaac Lab settings file with the path names parsed
isaaclab_settings = re.sub(
r"\"python.analysis.extraPaths\": \[.*?\]",
'"python.analysis.extraPaths": [\n\t\t'.expandtabs(4) + path_names + "\n\t]".expandtabs(4),
isaaclab_settings,
flags=re.DOTALL,
)
# return the Isaac Lab settings string
return isaaclab_settings
def overwrite_default_python_interpreter(isaaclab_settings: str) -> str:
"""Overwrite the default python interpreter in the Isaac Lab settings file.
The default python interpreter is replaced with the path to the python interpreter used by the
isaac-sim project. This is necessary because the default python interpreter is the one shipped with
isaac-sim.
Args:
isaaclab_settings: The settings string to use as template.
Returns:
The settings string with overwritten default python interpreter.
"""
# read executable name
python_exe = os.path.normpath(sys.executable)
# replace with Isaac Sim's python.sh or python.bat scripts to make sure python with correct
# source paths is set as default
if f"kit{os.sep}python{os.sep}bin{os.sep}python" in python_exe:
# Check if the OS is Windows or Linux to use appropriate shell file
if platform.system() == "Windows":
python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.bat")
else:
python_exe = python_exe.replace(f"kit{os.sep}python{os.sep}bin{os.sep}python3", "python.sh")
# replace the default python interpreter in the Isaac Lab settings file with the path to the
# python interpreter in the Isaac Lab directory
isaaclab_settings = re.sub(
r"\"python.defaultInterpreterPath\": \".*?\"",
f'"python.defaultInterpreterPath": "{python_exe}"',
isaaclab_settings,
flags=re.DOTALL,
)
# return the Isaac Lab settings file
return isaaclab_settings
def main():
# Isaac Lab template settings
isaaclab_vscode_template_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "settings.template.json")
# make sure the Isaac Lab template settings file exists
if not os.path.exists(isaaclab_vscode_template_filename):
raise FileNotFoundError(
f"Could not find the Isaac Lab template settings file: {isaaclab_vscode_template_filename}"
)
# read the Isaac Lab template settings file
with open(isaaclab_vscode_template_filename) as f:
isaaclab_template_settings = f.read()
# overwrite the python.analysis.extraPaths in the Isaac Lab settings file with the path names
isaaclab_settings = overwrite_python_analysis_extra_paths(isaaclab_template_settings)
# overwrite the default python interpreter in the Isaac Lab settings file with the path to the
# python interpreter used to call this script
isaaclab_settings = overwrite_default_python_interpreter(isaaclab_settings)
# add template notice to the top of the file
header_message = (
"// This file is a template and is automatically generated by the setup_vscode.py script.\n"
"// Do not edit this file directly.\n"
"// \n"
f"// Generated from: {isaaclab_vscode_template_filename}\n"
)
isaaclab_settings = header_message + isaaclab_settings
# write the Isaac Lab settings file
isaaclab_vscode_filename = os.path.join(PROJECT_DIR, ".vscode", "settings.json")
with open(isaaclab_vscode_filename, "w") as f:
f.write(isaaclab_settings)
# copy the launch.json file if it doesn't exist
isaaclab_vscode_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "launch.json")
isaaclab_vscode_template_launch_filename = os.path.join(PROJECT_DIR, ".vscode", "tools", "launch.template.json")
if not os.path.exists(isaaclab_vscode_launch_filename):
# read template launch settings
with open(isaaclab_vscode_template_launch_filename) as f:
isaaclab_template_launch_settings = f.read()
# add header
header_message = header_message.replace(
isaaclab_vscode_template_filename, isaaclab_vscode_template_launch_filename
)
isaaclab_launch_settings = header_message + isaaclab_template_launch_settings
# write the Isaac Lab launch settings file
with open(isaaclab_vscode_launch_filename, "w") as f:
f.write(isaaclab_launch_settings)
if __name__ == "__main__":
main()

135
README.md Normal file
View File

@@ -0,0 +1,135 @@
# Template for Isaac Lab Projects
## Overview
This project/repository serves as a template for building projects or extensions based on Isaac Lab.
It allows you to develop in an isolated environment, outside of the core Isaac Lab repository.
**Key Features:**
- `Isolation` Work outside the core Isaac Lab repository, ensuring that your development efforts remain self-contained.
- `Flexibility` This template is set up to allow your code to be run as an extension in Omniverse.
**Keywords:** extension, template, isaaclab
## Installation
- Install Isaac Lab by following the [installation guide](https://isaac-sim.github.io/IsaacLab/main/source/setup/installation/index.html).
We recommend using the conda or uv installation as it simplifies calling Python scripts from the terminal.
- Clone or copy this project/repository separately from the Isaac Lab installation (i.e. outside the `IsaacLab` directory):
- Using a python interpreter that has Isaac Lab installed, install the library in editable mode using:
```bash
# use 'PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda
python -m pip install -e source/mindbot
- Verify that the extension is correctly installed by:
- Listing the available tasks:
Note: It the task name changes, it may be necessary to update the search pattern `"Template-"`
(in the `scripts/list_envs.py` file) so that it can be listed.
```bash
# use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda
python scripts/list_envs.py
```
- Running a task:
```bash
# use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda
python scripts/<RL_LIBRARY>/train.py --task=<TASK_NAME>
```
- Running a task with dummy agents:
These include dummy agents that output zero or random agents. They are useful to ensure that the environments are configured correctly.
- Zero-action agent
```bash
# use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda
python scripts/zero_agent.py --task=<TASK_NAME>
```
- Random-action agent
```bash
# use 'FULL_PATH_TO_isaaclab.sh|bat -p' instead of 'python' if Isaac Lab is not installed in Python venv or conda
python scripts/random_agent.py --task=<TASK_NAME>
```
### Set up IDE (Optional)
To setup the IDE, please follow these instructions:
- Run VSCode Tasks, by pressing `Ctrl+Shift+P`, selecting `Tasks: Run Task` and running the `setup_python_env` in the drop down menu.
When running this task, you will be prompted to add the absolute path to your Isaac Sim installation.
If everything executes correctly, it should create a file .python.env in the `.vscode` directory.
The file contains the python paths to all the extensions provided by Isaac Sim and Omniverse.
This helps in indexing all the python modules for intelligent suggestions while writing code.
### Setup as Omniverse Extension (Optional)
We provide an example UI extension that will load upon enabling your extension defined in `source/mindbot/mindbot/ui_extension_example.py`.
To enable your extension, follow these steps:
1. **Add the search path of this project/repository** to the extension manager:
- Navigate to the extension manager using `Window` -> `Extensions`.
- Click on the **Hamburger Icon**, then go to `Settings`.
- In the `Extension Search Paths`, enter the absolute path to the `source` directory of this project/repository.
- If not already present, in the `Extension Search Paths`, enter the path that leads to Isaac Lab's extension directory directory (`IsaacLab/source`)
- Click on the **Hamburger Icon**, then click `Refresh`.
2. **Search and enable your extension**:
- Find your extension under the `Third Party` category.
- Toggle it to enable your extension.
## Code formatting
We have a pre-commit template to automatically format your code.
To install pre-commit:
```bash
pip install pre-commit
```
Then you can run pre-commit with:
```bash
pre-commit run --all-files
```
## Troubleshooting
### Pylance Missing Indexing of Extensions
In some VsCode versions, the indexing of part of the extensions is missing.
In this case, add the path to your extension in `.vscode/settings.json` under the key `"python.analysis.extraPaths"`.
```json
{
"python.analysis.extraPaths": [
"<path-to-ext-repo>/source/mindbot"
]
}
```
### Pylance Crash
If you encounter a crash in `pylance`, it is probable that too many files are indexed and you run out of memory.
A possible solution is to exclude some of omniverse packages that are not used in your project.
To do so, modify `.vscode/settings.json` and comment out packages under the key `"python.analysis.extraPaths"`
Some examples of packages that can likely be excluded are:
```json
"<path-to-isaac-sim>/extscache/omni.anim.*" // Animation packages
"<path-to-isaac-sim>/extscache/omni.kit.*" // Kit UI tools
"<path-to-isaac-sim>/extscache/omni.graph.*" // Graph UI tools
"<path-to-isaac-sim>/extscache/omni.services.*" // Services tools
...
```

64
scripts/list_envs.py Normal file
View File

@@ -0,0 +1,64 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Script to print all the available environments in Isaac Lab.
The script iterates over all registered environments and stores the details in a table.
It prints the name of the environment, the entry point and the config file.
All the environments are registered in the `mindbot` extension. They start
with `Isaac` in their name.
"""
"""Launch Isaac Sim Simulator first."""
from isaaclab.app import AppLauncher
# launch omniverse app
app_launcher = AppLauncher(headless=True)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
from prettytable import PrettyTable
import mindbot.tasks # noqa: F401
def main():
"""Print all environments registered in `mindbot` extension."""
# print all the available environments
table = PrettyTable(["S. No.", "Task Name", "Entry Point", "Config"])
table.title = "Available Environments in Isaac Lab"
# set alignment of table columns
table.align["Task Name"] = "l"
table.align["Entry Point"] = "l"
table.align["Config"] = "l"
# count of environments
index = 0
# acquire all Isaac environments names
for task_spec in gym.registry.values():
if "Template-" in task_spec.id:
# add details to table
table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec.kwargs["env_cfg_entry_point"]])
# increment count
index += 1
print(table)
if __name__ == "__main__":
try:
# run the main function
main()
except Exception as e:
raise e
finally:
# close the app
simulation_app.close()

72
scripts/random_agent.py Normal file
View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to an environment with random action agent."""
"""Launch Isaac Sim Simulator first."""
import argparse
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Random agent for Isaac Lab environments.")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args()
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import torch
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import parse_env_cfg
import mindbot.tasks # noqa: F401
def main():
"""Random actions agent with Isaac Lab environment."""
# create environment configuration
env_cfg = parse_env_cfg(
args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
)
# create environment
env = gym.make(args_cli.task, cfg=env_cfg)
# print info (this is vectorized environment)
print(f"[INFO]: Gym observation space: {env.observation_space}")
print(f"[INFO]: Gym action space: {env.action_space}")
# reset environment
env.reset()
# simulate environment
while simulation_app.is_running():
# run everything in inference mode
with torch.inference_mode():
# sample actions from -1 to 1
actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
# apply actions
env.step(actions)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

243
scripts/rl_games/play.py Normal file
View File

@@ -0,0 +1,243 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to play a checkpoint if an RL agent from RL-Games."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--use_pretrained_checkpoint",
action="store_true",
help="Use the pre-trained checkpoint from Nucleus.",
)
parser.add_argument(
"--use_last_checkpoint",
action="store_true",
help="When no checkpoint provided, use the last saved model. Otherwise use the best saved model.",
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import math
import os
import random
import time
import torch
from rl_games.common import env_configurations, vecenv
from rl_games.common.player import BasePlayer
from rl_games.torch_runner import Runner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict):
"""Play with RL-Games agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# update agent device to match simulation device
if args_cli.device is not None:
agent_cfg["params"]["config"]["device"] = args_cli.device
agent_cfg["params"]["config"]["device_name"] = args_cli.device
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
agent_cfg["params"]["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["params"]["seed"]
# set the environment seed (after multi-gpu config for updated rank from agent seed)
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg["params"]["seed"]
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rl_games", agent_cfg["params"]["config"]["name"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
# find checkpoint
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("rl_games", train_task_name)
if not resume_path:
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
return
elif args_cli.checkpoint is None:
# specify directory for logging runs
run_dir = agent_cfg["params"]["config"].get("full_experiment_name", ".*")
# specify name of checkpoint
if args_cli.use_last_checkpoint:
checkpoint_file = ".*"
else:
# this loads the best checkpoint
checkpoint_file = f"{agent_cfg['params']['config']['name']}.pth"
# get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file, other_dirs=["nn"])
else:
resume_path = retrieve_file_path(args_cli.checkpoint)
log_dir = os.path.dirname(os.path.dirname(resume_path))
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# wrap around environment for rl-games
rl_device = agent_cfg["params"]["config"]["device"]
clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf)
clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf)
obs_groups = agent_cfg["params"]["env"].get("obs_groups")
concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_root_path, log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups)
# register the environment to rl-games registry
# note: in agents configuration: environment name must be "rlgpu"
vecenv.register(
"IsaacRlgWrapper", lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs)
)
env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env})
# load previously trained model
agent_cfg["params"]["load_checkpoint"] = True
agent_cfg["params"]["load_path"] = resume_path
print(f"[INFO]: Loading model checkpoint from: {agent_cfg['params']['load_path']}")
# set number of actors into agent config
agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs
# create runner from rl-games
runner = Runner()
runner.load(agent_cfg)
# obtain the agent from the runner
agent: BasePlayer = runner.create_player()
agent.restore(resume_path)
agent.reset()
dt = env.unwrapped.step_dt
# reset environment
obs = env.reset()
if isinstance(obs, dict):
obs = obs["obs"]
timestep = 0
# required: enables the flag for batched observations
_ = agent.get_batch_size(obs, 1)
# initialize RNN states if used
if agent.is_rnn:
agent.init_rnn()
# simulate environment
# note: We simplified the logic in rl-games player.py (:func:`BasePlayer.run()`) function in an
# attempt to have complete control over environment stepping. However, this removes other
# operations such as masking that is used for multi-agent learning by RL-Games.
while simulation_app.is_running():
start_time = time.time()
# run everything in inference mode
with torch.inference_mode():
# convert obs to agent format
obs = agent.obs_to_torch(obs)
# agent stepping
actions = agent.get_action(obs, is_deterministic=agent.is_deterministic)
# env stepping
obs, _, dones, _ = env.step(actions)
# perform operations for terminated episodes
if len(dones) > 0:
# reset rnn state for terminated episodes
if agent.is_rnn and agent.states is not None:
for s in agent.states:
s[:, dones, :] = 0.0
if args_cli.video:
timestep += 1
# exit the play loop after recording one video
if timestep == args_cli.video_length:
break
# time delay for real-time evaluation
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

255
scripts/rl_games/train.py Normal file
View File

@@ -0,0 +1,255 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to train RL agent with RL-Games."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from distutils.util import strtobool
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
)
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
parser.add_argument("--sigma", type=str, default=None, help="The policy's initial standard deviation.")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
parser.add_argument("--wandb-project-name", type=str, default=None, help="the wandb's project name")
parser.add_argument("--wandb-entity", type=str, default=None, help="the entity (team) of wandb's project")
parser.add_argument("--wandb-name", type=str, default=None, help="the name of wandb's run")
parser.add_argument(
"--track",
type=lambda x: bool(strtobool(x)),
default=False,
nargs="?",
const=True,
help="if toggled, this experiment will be tracked with Weights and Biases",
)
parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import math
import os
import random
from datetime import datetime
import omni
from rl_games.common import env_configurations, vecenv
from rl_games.common.algo_observer import IsaacAlgoObserver
from rl_games.torch_runner import Runner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab.utils.io import dump_yaml
from isaaclab_rl.rl_games import MultiObserver, PbtAlgoObserver, RlGamesGpuEnv, RlGamesVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict):
"""Train with RL-Games agent."""
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# check for invalid combination of CPU device with distributed training
if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
raise ValueError(
"Distributed training is not supported when using CPU device. "
"Please use GPU device (e.g., --device cuda) for distributed training."
)
# update agent device to match simulation device
if args_cli.device is not None:
agent_cfg["params"]["config"]["device"] = args_cli.device
agent_cfg["params"]["config"]["device_name"] = args_cli.device
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
agent_cfg["params"]["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["params"]["seed"]
agent_cfg["params"]["config"]["max_epochs"] = (
args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg["params"]["config"]["max_epochs"]
)
if args_cli.checkpoint is not None:
resume_path = retrieve_file_path(args_cli.checkpoint)
agent_cfg["params"]["load_checkpoint"] = True
agent_cfg["params"]["load_path"] = resume_path
print(f"[INFO]: Loading model checkpoint from: {agent_cfg['params']['load_path']}")
train_sigma = float(args_cli.sigma) if args_cli.sigma is not None else None
# multi-gpu training config
if args_cli.distributed:
agent_cfg["params"]["seed"] += app_launcher.global_rank
agent_cfg["params"]["config"]["device"] = f"cuda:{app_launcher.local_rank}"
agent_cfg["params"]["config"]["device_name"] = f"cuda:{app_launcher.local_rank}"
agent_cfg["params"]["config"]["multi_gpu"] = True
# update env config device
env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
# set the environment seed (after multi-gpu config for updated rank from agent seed)
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg["params"]["seed"]
# specify directory for logging experiments
config_name = agent_cfg["params"]["config"]["name"]
log_root_path = os.path.join("logs", "rl_games", config_name)
if "pbt" in agent_cfg:
if agent_cfg["pbt"]["directory"] == ".":
log_root_path = os.path.abspath(log_root_path)
else:
log_root_path = os.path.join(agent_cfg["pbt"]["directory"], log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs
log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
# set directory into agent config
# logging directory path: <train_dir>/<full_experiment_name>
agent_cfg["params"]["config"]["train_dir"] = log_root_path
agent_cfg["params"]["config"]["full_experiment_name"] = log_dir
wandb_project = config_name if args_cli.wandb_project_name is None else args_cli.wandb_project_name
experiment_name = log_dir if args_cli.wandb_name is None else args_cli.wandb_name
# dump the configuration into log-directory
dump_yaml(os.path.join(log_root_path, log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_root_path, log_dir, "params", "agent.yaml"), agent_cfg)
# read configurations about the agent-training
rl_device = agent_cfg["params"]["config"]["device"]
clip_obs = agent_cfg["params"]["env"].get("clip_observations", math.inf)
clip_actions = agent_cfg["params"]["env"].get("clip_actions", math.inf)
obs_groups = agent_cfg["params"]["env"].get("obs_groups")
concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True)
# set the IO descriptors export flag if requested
if isinstance(env_cfg, ManagerBasedRLEnvCfg):
env_cfg.export_io_descriptors = args_cli.export_io_descriptors
else:
omni.log.warn(
"IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = os.path.join(log_root_path, log_dir)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_root_path, log_dir, "videos", "train"),
"step_trigger": lambda step: step % args_cli.video_interval == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups)
# register the environment to rl-games registry
# note: in agents configuration: environment name must be "rlgpu"
vecenv.register(
"IsaacRlgWrapper", lambda config_name, num_actors, **kwargs: RlGamesGpuEnv(config_name, num_actors, **kwargs)
)
env_configurations.register("rlgpu", {"vecenv_type": "IsaacRlgWrapper", "env_creator": lambda **kwargs: env})
# set number of actors into agent config
agent_cfg["params"]["config"]["num_actors"] = env.unwrapped.num_envs
# create runner from rl-games
if "pbt" in agent_cfg and agent_cfg["pbt"]["enabled"]:
observers = MultiObserver([IsaacAlgoObserver(), PbtAlgoObserver(agent_cfg, args_cli)])
runner = Runner(observers)
else:
runner = Runner(IsaacAlgoObserver())
runner.load(agent_cfg)
# reset the agent and env
runner.reset()
# train the agent
global_rank = int(os.getenv("RANK", "0"))
if args_cli.track and global_rank == 0:
if args_cli.wandb_entity is None:
raise ValueError("Weights and Biases entity must be specified for tracking.")
import wandb
wandb.init(
project=wandb_project,
entity=args_cli.wandb_entity,
name=experiment_name,
sync_tensorboard=True,
monitor_gym=True,
save_code=True,
)
if not wandb.run.resumed:
wandb.config.update({"env_cfg": env_cfg.to_dict()})
wandb.config.update({"agent_cfg": agent_cfg})
if args_cli.checkpoint is not None:
runner.run({"train": True, "play": False, "sigma": train_sigma, "checkpoint": resume_path})
else:
runner.run({"train": True, "play": False, "sigma": train_sigma})
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

View File

@@ -0,0 +1,91 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import argparse
import random
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg
def add_rsl_rl_args(parser: argparse.ArgumentParser):
"""Add RSL-RL arguments to the parser.
Args:
parser: The parser to add the arguments to.
"""
# create a new argument group
arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
# -- experiment arguments
arg_group.add_argument(
"--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored."
)
arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.")
# -- load arguments
arg_group.add_argument("--resume", action="store_true", default=False, help="Whether to resume from a checkpoint.")
arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
# -- logger arguments
arg_group.add_argument(
"--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use."
)
arg_group.add_argument(
"--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune."
)
def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg:
"""Parse configuration for RSL-RL agent based on inputs.
Args:
task_name: The name of the environment.
args_cli: The command line arguments.
Returns:
The parsed configuration for RSL-RL agent based on inputs.
"""
from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry
# load the default configuration
rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli)
return rslrl_cfg
def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace):
"""Update configuration for RSL-RL agent based on inputs.
Args:
agent_cfg: The configuration for RSL-RL agent.
args_cli: The command line arguments.
Returns:
The updated configuration for RSL-RL agent based on inputs.
"""
# override the default configuration with CLI arguments
if hasattr(args_cli, "seed") and args_cli.seed is not None:
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
agent_cfg.seed = args_cli.seed
if args_cli.resume is not None:
agent_cfg.resume = args_cli.resume
if args_cli.load_run is not None:
agent_cfg.load_run = args_cli.load_run
if args_cli.checkpoint is not None:
agent_cfg.load_checkpoint = args_cli.checkpoint
if args_cli.run_name is not None:
agent_cfg.run_name = args_cli.run_name
if args_cli.logger is not None:
agent_cfg.logger = args_cli.logger
# set the project name for wandb and neptune
if agent_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name:
agent_cfg.wandb_project = args_cli.log_project_name
agent_cfg.neptune_project = args_cli.log_project_name
return agent_cfg

210
scripts/rsl_rl/play.py Normal file
View File

@@ -0,0 +1,210 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to play a checkpoint if an RL agent from RSL-RL."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# local imports
import cli_args # isort: skip
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--use_pretrained_checkpoint",
action="store_true",
help="Use the pre-trained checkpoint from Nucleus.",
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# append RSL-RL cli arguments
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import os
import time
import torch
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
"""Play with RSL-RL agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
# override configurations with non-hydra CLI arguments
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
if not resume_path:
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
return
elif args_cli.checkpoint:
resume_path = retrieve_file_path(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
log_dir = os.path.dirname(resume_path)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
runner.load(resume_path)
# obtain the trained policy for inference
policy = runner.get_inference_policy(device=env.unwrapped.device)
# extract the neural network module
# we do this in a try-except to maintain backwards compatibility.
try:
# version 2.3 onwards
policy_nn = runner.alg.policy
except AttributeError:
# version 2.2 and below
policy_nn = runner.alg.actor_critic
# extract the normalizer
if hasattr(policy_nn, "actor_obs_normalizer"):
normalizer = policy_nn.actor_obs_normalizer
elif hasattr(policy_nn, "student_obs_normalizer"):
normalizer = policy_nn.student_obs_normalizer
else:
normalizer = None
# export policy to onnx/jit
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
dt = env.unwrapped.step_dt
# reset environment
obs = env.get_observations()
timestep = 0
# simulate environment
while simulation_app.is_running():
start_time = time.time()
# run everything in inference mode
with torch.inference_mode():
# agent stepping
actions = policy(obs)
# env stepping
obs, _, dones, _ = env.step(actions)
# reset recurrent states for episodes that have terminated
policy_nn.reset(dones)
if args_cli.video:
timestep += 1
# Exit the play loop after recording one video
if timestep == args_cli.video_length:
break
# time delay for real-time evaluation
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

217
scripts/rsl_rl/train.py Normal file
View File

@@ -0,0 +1,217 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to train RL agent with RSL-RL."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# local imports
import cli_args # isort: skip
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
parser.add_argument(
"--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
)
parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
# append RSL-RL cli arguments
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Check for minimum supported RSL-RL version."""
import importlib.metadata as metadata
import platform
from packaging import version
# check minimum supported rsl-rl version
RSL_RL_VERSION = "3.0.1"
installed_version = metadata.version("rsl-rl-lib")
if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
if platform.system() == "Windows":
cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
else:
cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
print(
f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'"
f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:"
f"\n\n\t{' '.join(cmd)}\n"
)
exit(1)
"""Rest everything follows."""
import gymnasium as gym
import os
import torch
from datetime import datetime
import omni
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.dict import print_dict
from isaaclab.utils.io import dump_yaml
from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
"""Train with RSL-RL agent."""
# override configurations with non-hydra CLI arguments
agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
agent_cfg.max_iterations = (
args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations
)
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg.seed
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# check for invalid combination of CPU device with distributed training
if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
raise ValueError(
"Distributed training is not supported when using CPU device. "
"Please use GPU device (e.g., --device cuda) for distributed training."
)
# multi-gpu training configuration
if args_cli.distributed:
env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
agent_cfg.device = f"cuda:{app_launcher.local_rank}"
# set seed to have diversity in different threads
seed = agent_cfg.seed + app_launcher.local_rank
env_cfg.seed = seed
agent_cfg.seed = seed
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs: {time-stamp}_{run_name}
log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849)
print(f"Exact experiment name requested from command line: {log_dir}")
if agent_cfg.run_name:
log_dir += f"_{agent_cfg.run_name}"
log_dir = os.path.join(log_root_path, log_dir)
# set the IO descriptors export flag if requested
if isinstance(env_cfg, ManagerBasedRLEnvCfg):
env_cfg.export_io_descriptors = args_cli.export_io_descriptors
else:
omni.log.warn(
"IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# save resume path before creating a new log_dir
if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "train"),
"step_trigger": lambda step: step % args_cli.video_interval == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
# create runner from rsl-rl
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
# write git state to logs
runner.add_git_repo_to_log(__file__)
# load the checkpoint
if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
runner.load(resume_path)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
# run training
runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

213
scripts/sb3/play.py Normal file
View File

@@ -0,0 +1,213 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to play a checkpoint if an RL agent from Stable-Baselines3."""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from pathlib import Path
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--use_pretrained_checkpoint",
action="store_true",
help="Use the pre-trained checkpoint from Nucleus.",
)
parser.add_argument(
"--use_last_checkpoint",
action="store_true",
help="When no checkpoint provided, use the last saved model. Otherwise use the best saved model.",
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
parser.add_argument(
"--keep_all_info",
action="store_true",
default=False,
help="Use a slower SB3 wrapper but keep all the extra training info.",
)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import os
import random
import time
import torch
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils.hydra import hydra_task_config
from isaaclab_tasks.utils.parse_cfg import get_checkpoint_path
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict):
"""Play with stable-baselines agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"]
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg["seed"]
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# directory for logging into
log_root_path = os.path.join("logs", "sb3", train_task_name)
log_root_path = os.path.abspath(log_root_path)
# checkpoint and log_dir stuff
if args_cli.use_pretrained_checkpoint:
checkpoint_path = get_published_pretrained_checkpoint("sb3", train_task_name)
if not checkpoint_path:
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
return
elif args_cli.checkpoint is None:
# FIXME: last checkpoint doesn't seem to really use the last one'
if args_cli.use_last_checkpoint:
checkpoint = "model_.*.zip"
else:
checkpoint = "model.zip"
checkpoint_path = get_checkpoint_path(log_root_path, ".*", checkpoint, sort_alpha=False)
else:
checkpoint_path = args_cli.checkpoint
log_dir = os.path.dirname(checkpoint_path)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# post-process agent configuration
agent_cfg = process_sb3_cfg(agent_cfg, env.unwrapped.num_envs)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for stable baselines
env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info)
vec_norm_path = checkpoint_path.replace("/model", "/model_vecnormalize").replace(".zip", ".pkl")
vec_norm_path = Path(vec_norm_path)
# normalize environment (if needed)
if vec_norm_path.exists():
print(f"Loading saved normalization: {vec_norm_path}")
env = VecNormalize.load(vec_norm_path, env)
# do not update them at test time
env.training = False
# reward normalization is not needed at test time
env.norm_reward = False
elif "normalize_input" in agent_cfg:
env = VecNormalize(
env,
training=True,
norm_obs="normalize_input" in agent_cfg and agent_cfg.pop("normalize_input"),
clip_obs="clip_obs" in agent_cfg and agent_cfg.pop("clip_obs"),
)
# create agent from stable baselines
print(f"Loading checkpoint from: {checkpoint_path}")
agent = PPO.load(checkpoint_path, env, print_system_info=True)
dt = env.unwrapped.step_dt
# reset environment
obs = env.reset()
timestep = 0
# simulate environment
while simulation_app.is_running():
start_time = time.time()
# run everything in inference mode
with torch.inference_mode():
# agent stepping
actions, _ = agent.predict(obs, deterministic=True)
# env stepping
obs, _, _, _ = env.step(actions)
if args_cli.video:
timestep += 1
# Exit the play loop after recording one video
if timestep == args_cli.video_length:
break
# time delay for real-time evaluation
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

229
scripts/sb3/train.py Normal file
View File

@@ -0,0 +1,229 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to train RL agent with Stable Baselines3."""
"""Launch Isaac Sim Simulator first."""
import argparse
import contextlib
import signal
import sys
from pathlib import Path
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point."
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--log_interval", type=int, default=100_000, help="Log data every n timesteps.")
parser.add_argument("--checkpoint", type=str, default=None, help="Continue the training from checkpoint.")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
parser.add_argument(
"--keep_all_info",
action="store_true",
default=False,
help="Use a slower SB3 wrapper but keep all the extra training info.",
)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
def cleanup_pbar(*args):
"""
A small helper to stop training and
cleanup progress bar properly on ctrl+c
"""
import gc
tqdm_objects = [obj for obj in gc.get_objects() if "tqdm" in type(obj).__name__]
for tqdm_object in tqdm_objects:
if "tqdm_rich" in type(tqdm_object).__name__:
tqdm_object.close()
raise KeyboardInterrupt
# disable KeyboardInterrupt override
signal.signal(signal.SIGINT, cleanup_pbar)
"""Rest everything follows."""
import gymnasium as gym
import numpy as np
import os
import random
from datetime import datetime
import omni
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback, LogEveryNTimesteps
from stable_baselines3.common.vec_env import VecNormalize
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.dict import print_dict
from isaaclab.utils.io import dump_yaml
from isaaclab_rl.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict):
"""Train with stable-baselines agent."""
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"]
# max iterations for training
if args_cli.max_iterations is not None:
agent_cfg["n_timesteps"] = args_cli.max_iterations * agent_cfg["n_steps"] * env_cfg.scene.num_envs
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
env_cfg.seed = agent_cfg["seed"]
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# directory for logging into
run_info = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_root_path = os.path.abspath(os.path.join("logs", "sb3", args_cli.task))
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849)
print(f"Exact experiment name requested from command line: {run_info}")
log_dir = os.path.join(log_root_path, run_info)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
# save command used to run the script
command = " ".join(sys.orig_argv)
(Path(log_dir) / "command.txt").write_text(command)
# post-process agent configuration
agent_cfg = process_sb3_cfg(agent_cfg, env_cfg.scene.num_envs)
# read configurations about the agent-training
policy_arch = agent_cfg.pop("policy")
n_timesteps = agent_cfg.pop("n_timesteps")
# set the IO descriptors export flag if requested
if isinstance(env_cfg, ManagerBasedRLEnvCfg):
env_cfg.export_io_descriptors = args_cli.export_io_descriptors
else:
omni.log.warn(
"IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "train"),
"step_trigger": lambda step: step % args_cli.video_interval == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for stable baselines
env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info)
norm_keys = {"normalize_input", "normalize_value", "clip_obs"}
norm_args = {}
for key in norm_keys:
if key in agent_cfg:
norm_args[key] = agent_cfg.pop(key)
if norm_args and norm_args.get("normalize_input"):
print(f"Normalizing input, {norm_args=}")
env = VecNormalize(
env,
training=True,
norm_obs=norm_args["normalize_input"],
norm_reward=norm_args.get("normalize_value", False),
clip_obs=norm_args.get("clip_obs", 100.0),
gamma=agent_cfg["gamma"],
clip_reward=np.inf,
)
# create agent from stable baselines
agent = PPO(policy_arch, env, verbose=1, tensorboard_log=log_dir, **agent_cfg)
if args_cli.checkpoint is not None:
agent = agent.load(args_cli.checkpoint, env, print_system_info=True)
# callbacks for agent
checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix="model", verbose=2)
callbacks = [checkpoint_callback, LogEveryNTimesteps(n_steps=args_cli.log_interval)]
# train the agent
with contextlib.suppress(KeyboardInterrupt):
agent.learn(
total_timesteps=n_timesteps,
callback=callbacks,
progress_bar=True,
log_interval=None,
)
# save the final model
agent.save(os.path.join(log_dir, "model"))
print("Saving to:")
print(os.path.join(log_dir, "model.zip"))
if isinstance(env, VecNormalize):
print("Saving normalization")
env.save(os.path.join(log_dir, "model_vecnormalize.pkl"))
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

250
scripts/skrl/play.py Normal file
View File

@@ -0,0 +1,250 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Script to play a checkpoint of an RL agent from skrl.
Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in
a more user-friendly way.
"""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from skrl.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent",
type=str,
default=None,
help=(
"Name of the RL agent configuration entry point. Defaults to None, in which case the argument "
"--algorithm is used to determine the default agent configuration entry point."
),
)
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--use_pretrained_checkpoint",
action="store_true",
help="Use the pre-trained checkpoint from Nucleus.",
)
parser.add_argument(
"--ml_framework",
type=str,
default="torch",
choices=["torch", "jax", "jax-numpy"],
help="The ML framework used for training the skrl agent.",
)
parser.add_argument(
"--algorithm",
type=str,
default="PPO",
choices=["AMP", "PPO", "IPPO", "MAPPO"],
help="The RL algorithm used for training the skrl agent.",
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import os
import random
import time
import torch
import skrl
from packaging import version
# check for minimum supported skrl version
SKRL_VERSION = "1.4.3"
if version.parse(skrl.__version__) < version.parse(SKRL_VERSION):
skrl.logger.error(
f"Unsupported skrl version: {skrl.__version__}. "
f"Install supported version using 'pip install skrl>={SKRL_VERSION}'"
)
exit()
if args_cli.ml_framework.startswith("torch"):
from skrl.utils.runner.torch import Runner
elif args_cli.ml_framework.startswith("jax"):
from skrl.utils.runner.jax import Runner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.skrl import SkrlVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
# config shortcuts
if args_cli.agent is None:
algorithm = args_cli.algorithm.lower()
agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point"
else:
agent_cfg_entry_point = args_cli.agent
algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower()
@hydra_task_config(args_cli.task, agent_cfg_entry_point)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, experiment_cfg: dict):
"""Play with skrl agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# configure the ML framework into the global skrl variable
if args_cli.ml_framework.startswith("jax"):
skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy"
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
# set the agent and environment seed from command line
# note: certain randomization occur in the environment initialization so we set the seed here
experiment_cfg["seed"] = args_cli.seed if args_cli.seed is not None else experiment_cfg["seed"]
env_cfg.seed = experiment_cfg["seed"]
# specify directory for logging experiments (load checkpoint)
log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
# get checkpoint path
if args_cli.use_pretrained_checkpoint:
resume_path = get_published_pretrained_checkpoint("skrl", train_task_name)
if not resume_path:
print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
return
elif args_cli.checkpoint:
resume_path = os.path.abspath(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(
log_root_path, run_dir=f".*_{algorithm}_{args_cli.ml_framework}", other_dirs=["checkpoints"]
)
log_dir = os.path.dirname(os.path.dirname(resume_path))
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)
# get environment (step) dt for real-time evaluation
try:
dt = env.step_dt
except AttributeError:
dt = env.unwrapped.step_dt
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for skrl
env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`
# configure and instantiate the skrl runner
# https://skrl.readthedocs.io/en/latest/api/utils/runner.html
experiment_cfg["trainer"]["close_environment_at_exit"] = False
experiment_cfg["agent"]["experiment"]["write_interval"] = 0 # don't log to TensorBoard
experiment_cfg["agent"]["experiment"]["checkpoint_interval"] = 0 # don't generate checkpoints
runner = Runner(env, experiment_cfg)
print(f"[INFO] Loading model checkpoint from: {resume_path}")
runner.agent.load(resume_path)
# set agent to evaluation mode
runner.agent.set_running_mode("eval")
# reset environment
obs, _ = env.reset()
timestep = 0
# simulate environment
while simulation_app.is_running():
start_time = time.time()
# run everything in inference mode
with torch.inference_mode():
# agent stepping
outputs = runner.agent.act(obs, timestep=0, timesteps=0)
# - multi-agent (deterministic) actions
if hasattr(env, "possible_agents"):
actions = {a: outputs[-1][a].get("mean_actions", outputs[0][a]) for a in env.possible_agents}
# - single-agent (deterministic) actions
else:
actions = outputs[-1].get("mean_actions", outputs[0])
# env stepping
obs, _, _, _, _ = env.step(actions)
if args_cli.video:
timestep += 1
# exit the play loop after recording one video
if timestep == args_cli.video_length:
break
# time delay for real-time evaluation
sleep_time = dt - (time.time() - start_time)
if args_cli.real_time and sleep_time > 0:
time.sleep(sleep_time)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

235
scripts/skrl/train.py Normal file
View File

@@ -0,0 +1,235 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Script to train RL agent with skrl.
Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in
a more user-friendly way.
"""
"""Launch Isaac Sim Simulator first."""
import argparse
import sys
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with skrl.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument(
"--agent",
type=str,
default=None,
help=(
"Name of the RL agent configuration entry point. Defaults to None, in which case the argument "
"--algorithm is used to determine the default agent configuration entry point."
),
)
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument(
"--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
)
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint to resume training.")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
parser.add_argument(
"--ml_framework",
type=str,
default="torch",
choices=["torch", "jax", "jax-numpy"],
help="The ML framework used for training the skrl agent.",
)
parser.add_argument(
"--algorithm",
type=str,
default="PPO",
choices=["AMP", "PPO", "IPPO", "MAPPO"],
help="The RL algorithm used for training the skrl agent.",
)
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import os
import random
from datetime import datetime
import omni
import skrl
from packaging import version
# check for minimum supported skrl version
SKRL_VERSION = "1.4.3"
if version.parse(skrl.__version__) < version.parse(SKRL_VERSION):
skrl.logger.error(
f"Unsupported skrl version: {skrl.__version__}. "
f"Install supported version using 'pip install skrl>={SKRL_VERSION}'"
)
exit()
if args_cli.ml_framework.startswith("torch"):
from skrl.utils.runner.torch import Runner
elif args_cli.ml_framework.startswith("jax"):
from skrl.utils.runner.jax import Runner
from isaaclab.envs import (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
from isaaclab.utils.assets import retrieve_file_path
from isaaclab.utils.dict import print_dict
from isaaclab.utils.io import dump_yaml
from isaaclab_rl.skrl import SkrlVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils.hydra import hydra_task_config
import mindbot.tasks # noqa: F401
# config shortcuts
if args_cli.agent is None:
algorithm = args_cli.algorithm.lower()
agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point"
else:
agent_cfg_entry_point = args_cli.agent
algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower()
@hydra_task_config(args_cli.task, agent_cfg_entry_point)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict):
"""Train with skrl agent."""
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# check for invalid combination of CPU device with distributed training
if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
raise ValueError(
"Distributed training is not supported when using CPU device. "
"Please use GPU device (e.g., --device cuda) for distributed training."
)
# multi-gpu training config
if args_cli.distributed:
env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
# max iterations for training
if args_cli.max_iterations:
agent_cfg["trainer"]["timesteps"] = args_cli.max_iterations * agent_cfg["agent"]["rollouts"]
agent_cfg["trainer"]["close_environment_at_exit"] = False
# configure the ML framework into the global skrl variable
if args_cli.ml_framework.startswith("jax"):
skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy"
# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
# set the agent and environment seed from command line
# note: certain randomization occur in the environment initialization so we set the seed here
agent_cfg["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"]
env_cfg.seed = agent_cfg["seed"]
# specify directory for logging experiments
log_root_path = os.path.join("logs", "skrl", agent_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs: {time-stamp}_{run_name}
log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_{algorithm}_{args_cli.ml_framework}"
# The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849)
print(f"Exact experiment name requested from command line: {log_dir}")
if agent_cfg["agent"]["experiment"]["experiment_name"]:
log_dir += f'_{agent_cfg["agent"]["experiment"]["experiment_name"]}'
# set directory into agent config
agent_cfg["agent"]["experiment"]["directory"] = log_root_path
agent_cfg["agent"]["experiment"]["experiment_name"] = log_dir
# update log_dir
log_dir = os.path.join(log_root_path, log_dir)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
# get checkpoint path (to resume training)
resume_path = retrieve_file_path(args_cli.checkpoint) if args_cli.checkpoint else None
# set the IO descriptors export flag if requested
if isinstance(env_cfg, ManagerBasedRLEnvCfg):
env_cfg.export_io_descriptors = args_cli.export_io_descriptors
else:
omni.log.warn(
"IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
)
# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "train"),
"step_trigger": lambda step: step % args_cli.video_interval == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for skrl
env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`
# configure and instantiate the skrl runner
# https://skrl.readthedocs.io/en/latest/api/utils/runner.html
runner = Runner(env, agent_cfg)
# load checkpoint (if specified)
if resume_path:
print(f"[INFO] Loading model checkpoint from: {resume_path}")
runner.agent.load(resume_path)
# run training
runner.run()
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

72
scripts/zero_agent.py Normal file
View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to run an environment with zero action agent."""
"""Launch Isaac Sim Simulator first."""
import argparse
from isaaclab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Zero agent for Isaac Lab environments.")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args()
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym
import torch
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import parse_env_cfg
import mindbot.tasks # noqa: F401
def main():
"""Zero actions agent with Isaac Lab environment."""
# parse configuration
env_cfg = parse_env_cfg(
args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
)
# create environment
env = gym.make(args_cli.task, cfg=env_cfg)
# print info (this is vectorized environment)
print(f"[INFO]: Gym observation space: {env.observation_space}")
print(f"[INFO]: Gym action space: {env.action_space}")
# reset environment
env.reset()
# simulate environment
while simulation_app.is_running():
# run everything in inference mode
with torch.inference_mode():
# compute zero actions
actions = torch.zeros(env.action_space.shape, device=env.unwrapped.device)
# apply actions
env.step(actions)
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()

View File

@@ -0,0 +1,35 @@
[package]
# Semantic Versioning is used: https://semver.org/
version = "0.1.0"
# Description
category = "isaaclab"
readme = "README.md"
title = "Extension Template"
author = "Isaac Lab Project Developers"
maintainer = "Isaac Lab Project Developers"
description="Extension Template for Isaac Lab"
repository = "https://github.com/isaac-sim/IsaacLab.git"
keywords = ["extension", "template", "isaaclab"]
[dependencies]
"isaaclab" = {}
"isaaclab_assets" = {}
"isaaclab_mimic" = {}
"isaaclab_rl" = {}
"isaaclab_tasks" = {}
# NOTE: Add additional dependencies here
[[python.module]]
name = "mindbot"
[isaac_lab_settings]
# TODO: Uncomment and list any apt dependencies here.
# If none, leave it commented out.
# apt_deps = ["example_package"]
# TODO: Uncomment and provide path to a ros_ws
# with rosdeps to be installed. If none,
# leave it commented out.
# ros_ws = "path/from/extension_root/to/ros_ws"

View File

@@ -0,0 +1,10 @@
Changelog
---------
0.1.0 (2025-11-13)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Created an initial template for building an extension or project based on Isaac Lab

View File

@@ -0,0 +1,14 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Python module serving as a project/extension template.
"""
# Register Gym environments.
from .tasks import *
# Register UI extensions.
from .ui_extension_example import *

View File

@@ -0,0 +1,17 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Package containing task implementations for the extension."""
##
# Register Gym environments.
##
from isaaclab_tasks.utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["utils", ".mdp"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Direct-v0",
entry_point=f"{__name__}.mindbot_env:MindbotEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,135 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectRLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_env_cfg import MindbotEnvCfg
class MindbotEnv(DirectRLEnv):
cfg: MindbotEnvCfg
def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: torch.Tensor) -> None:
self.actions = actions.clone()
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
def _get_observations(self) -> dict:
obs = torch.cat(
(
self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
)
observations = {"policy": obs}
return observations
def _get_rewards(self) -> torch.Tensor:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_vel,
self.joint_pos[:, self._pole_dof_idx[0]],
self.joint_vel[:, self._pole_dof_idx[0]],
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
self.reset_terminated,
)
return total_reward
def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
return out_of_bounds, time_out
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_pole_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_vel: float,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
return total_reward

View File

@@ -0,0 +1,48 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectRLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotEnvCfg(DirectRLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# - spaces definition
action_space = 1
observation_space = 4
state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
# - action scale
action_scale = 100.0 # [N]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_pole_pos = -1.0
rew_scale_cart_vel = -0.01
rew_scale_pole_vel = -0.005
# - reset states/conditions
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Marl-Direct-v0",
entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,184 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectMARLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_marl_env_cfg import MindbotMarlEnvCfg
class MindbotMarlEnv(DirectMARLEnv):
cfg: MindbotMarlEnvCfg
def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
self.actions = actions
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(
self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx
)
self.robot.set_joint_effort_target(
self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx
)
def _get_observations(self) -> dict[str, torch.Tensor]:
pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1))
pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1))
observations = {
"cart": torch.cat(
(
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
pole_joint_pos,
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
"pendulum": torch.cat(
(
pole_joint_pos + pendulum_joint_pos,
pendulum_joint_pos,
self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
}
return observations
def _get_rewards(self) -> dict[str, torch.Tensor]:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_cart_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_pole_vel,
self.cfg.rew_scale_pendulum_pos,
self.cfg.rew_scale_pendulum_vel,
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
self.joint_vel[:, self._pole_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
self.joint_vel[:, self._pendulum_dof_idx[0]],
math.prod(self.terminated_dict.values()),
)
return total_reward
def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
time_outs = {agent: time_out for agent in self.cfg.possible_agents}
return terminated, time_outs
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_pos[:, self._pendulum_dof_idx] += sample_uniform(
self.cfg.initial_pendulum_angle_range[0] * math.pi,
self.cfg.initial_pendulum_angle_range[1] * math.pi,
joint_pos[:, self._pendulum_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def normalize_angle(angle):
return (angle + math.pi) % (2 * math.pi) - math.pi
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_cart_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_pos: float,
rew_scale_pole_vel: float,
rew_scale_pendulum_pos: float,
rew_scale_pendulum_vel: float,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
pendulum_pos: torch.Tensor,
pendulum_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
total_reward = {
"cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
"pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
}
return total_reward

View File

@@ -0,0 +1,55 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectMARLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotMarlEnvCfg(DirectMARLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# multi-agent specification and spaces definition
possible_agents = ["cart", "pendulum"]
action_spaces = {"cart": 1, "pendulum": 1}
observation_spaces = {"cart": 4, "pendulum": 3}
state_space = -1
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
pendulum_dof_name = "pole_to_pendulum"
# - action scale
cart_action_scale = 100.0 # [N]
pendulum_action_scale = 50.0 # [Nm]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_cart_pos = 0
rew_scale_cart_vel = -0.01
rew_scale_pole_pos = -1.0
rew_scale_pole_vel = -0.01
rew_scale_pendulum_pos = -1.0
rew_scale_pendulum_vel = -0.01
# - reset states/conditions
initial_pendulum_angle_range = [-0.25, 0.25] # pendulum angle sample range on reset [rad]
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,10 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403

View File

@@ -0,0 +1,26 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)

View File

@@ -0,0 +1,180 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from . import mdp
##
# Pre-defined configs
##
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG # isort:skip
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# reset
reset_cart_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
"position_range": (-1.0, 1.0),
"velocity_range": (-0.5, 0.5),
},
)
reset_pole_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
"position_range": (-0.25 * math.pi, 0.25 * math.pi),
"velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Constant running reward
alive = RewTerm(func=mdp.is_alive, weight=1.0)
# (2) Failure penalty
terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
# (3) Primary task: keep pole upright
pole_pos = RewTerm(
func=mdp.joint_pos_target_l2,
weight=-1.0,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
)
# (4) Shaping tasks: lower cart velocity
cart_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.01,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
)
# (5) Shaping tasks: lower pole angular velocity
pole_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.005,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Time out
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Cart out of bounds
cart_out_of_bounds = DoneTerm(
func=mdp.joint_pos_out_of_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
)
##
# Environment configuration
##
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 5
# viewer settings
self.viewer.eye = (8.0, 0.0, 5.0)
# simulation settings
self.sim.dt = 1 / 120
self.sim.render_interval = self.decimation

View File

@@ -0,0 +1,46 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import omni.ext
# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
def some_public_function(x: int):
print("[mindbot] some_public_function was called with x: ", x)
return x**x
# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
# on_shutdown() is called.
class ExampleExtension(omni.ext.IExt):
# ext_id is current extension id. It can be used with extension manager to query additional information, like where
# this extension is located on filesystem.
def on_startup(self, ext_id):
print("[mindbot] startup")
self._count = 0
self._window = omni.ui.Window("My Window", width=300, height=300)
with self._window.frame:
with omni.ui.VStack():
label = omni.ui.Label("")
def on_click():
self._count += 1
label.text = f"count: {self._count}"
def on_reset():
self._count = 0
label.text = "empty"
on_reset()
with omni.ui.HStack():
omni.ui.Button("Add", clicked_fn=on_click)
omni.ui.Button("Reset", clicked_fn=on_reset)
def on_shutdown(self):
print("[mindbot] shutdown")

View File

@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools", "wheel", "toml"]
build-backend = "setuptools.build_meta"

47
source/mindbot/setup.py Normal file
View File

@@ -0,0 +1,47 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Installation script for the 'mindbot' python package."""
import os
import toml
from setuptools import setup
# Obtain the extension data from the extension.toml file
EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
# Read the extension.toml file
EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
# Minimum dependencies required prior to installation
INSTALL_REQUIRES = [
# NOTE: Add dependencies
"psutil",
]
# Installation operation
setup(
name="mindbot",
packages=["mindbot"],
author=EXTENSION_TOML_DATA["package"]["author"],
maintainer=EXTENSION_TOML_DATA["package"]["maintainer"],
url=EXTENSION_TOML_DATA["package"]["repository"],
version=EXTENSION_TOML_DATA["package"]["version"],
description=EXTENSION_TOML_DATA["package"]["description"],
keywords=EXTENSION_TOML_DATA["package"]["keywords"],
install_requires=INSTALL_REQUIRES,
license="Apache-2.0",
include_package_data=True,
python_requires=">=3.10",
classifiers=[
"Natural Language :: English",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Isaac Sim :: 4.5.0",
"Isaac Sim :: 5.0.0",
"Isaac Sim :: 5.1.0",
],
zip_safe=False,
)