Initial commit

This commit is contained in:
2025-11-13 17:37:07 +08:00
commit 1f7053a306
67 changed files with 6071 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
[package]
# Semantic Versioning is used: https://semver.org/
version = "0.1.0"
# Description
category = "isaaclab"
readme = "README.md"
title = "Extension Template"
author = "Isaac Lab Project Developers"
maintainer = "Isaac Lab Project Developers"
description="Extension Template for Isaac Lab"
repository = "https://github.com/isaac-sim/IsaacLab.git"
keywords = ["extension", "template", "isaaclab"]
[dependencies]
"isaaclab" = {}
"isaaclab_assets" = {}
"isaaclab_mimic" = {}
"isaaclab_rl" = {}
"isaaclab_tasks" = {}
# NOTE: Add additional dependencies here
[[python.module]]
name = "mindbot"
[isaac_lab_settings]
# TODO: Uncomment and list any apt dependencies here.
# If none, leave it commented out.
# apt_deps = ["example_package"]
# TODO: Uncomment and provide path to a ros_ws
# with rosdeps to be installed. If none,
# leave it commented out.
# ros_ws = "path/from/extension_root/to/ros_ws"

View File

@@ -0,0 +1,10 @@
Changelog
---------
0.1.0 (2025-11-13)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Created an initial template for building an extension or project based on Isaac Lab

View File

@@ -0,0 +1,14 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Python module serving as a project/extension template.
"""
# Register Gym environments.
from .tasks import *
# Register UI extensions.
from .ui_extension_example import *

View File

@@ -0,0 +1,17 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Package containing task implementations for the extension."""
##
# Register Gym environments.
##
from isaaclab_tasks.utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["utils", ".mdp"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Direct-v0",
entry_point=f"{__name__}.mindbot_env:MindbotEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,135 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectRLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_env_cfg import MindbotEnvCfg
class MindbotEnv(DirectRLEnv):
cfg: MindbotEnvCfg
def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: torch.Tensor) -> None:
self.actions = actions.clone()
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
def _get_observations(self) -> dict:
obs = torch.cat(
(
self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
)
observations = {"policy": obs}
return observations
def _get_rewards(self) -> torch.Tensor:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_vel,
self.joint_pos[:, self._pole_dof_idx[0]],
self.joint_vel[:, self._pole_dof_idx[0]],
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
self.reset_terminated,
)
return total_reward
def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
return out_of_bounds, time_out
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_pole_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_vel: float,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
return total_reward

View File

@@ -0,0 +1,48 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectRLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotEnvCfg(DirectRLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# - spaces definition
action_space = 1
observation_space = 4
state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
# - action scale
action_scale = 100.0 # [N]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_pole_pos = -1.0
rew_scale_cart_vel = -0.01
rew_scale_pole_vel = -0.005
# - reset states/conditions
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-Marl-Direct-v0",
entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,184 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectMARLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .mindbot_marl_env_cfg import MindbotMarlEnvCfg
class MindbotMarlEnv(DirectMARLEnv):
cfg: MindbotMarlEnvCfg
def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# we need to explicitly filter collisions for CPU simulation
if self.device == "cpu":
self.scene.filter_collisions(global_prim_paths=[])
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
self.actions = actions
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(
self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx
)
self.robot.set_joint_effort_target(
self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx
)
def _get_observations(self) -> dict[str, torch.Tensor]:
pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1))
pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1))
observations = {
"cart": torch.cat(
(
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
pole_joint_pos,
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
"pendulum": torch.cat(
(
pole_joint_pos + pendulum_joint_pos,
pendulum_joint_pos,
self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
),
}
return observations
def _get_rewards(self) -> dict[str, torch.Tensor]:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_cart_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_pole_vel,
self.cfg.rew_scale_pendulum_pos,
self.cfg.rew_scale_pendulum_vel,
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
self.joint_vel[:, self._pole_dof_idx[0]],
normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
self.joint_vel[:, self._pendulum_dof_idx[0]],
math.prod(self.terminated_dict.values()),
)
return total_reward
def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
time_outs = {agent: time_out for agent in self.cfg.possible_agents}
return terminated, time_outs
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_pos[:, self._pendulum_dof_idx] += sample_uniform(
self.cfg.initial_pendulum_angle_range[0] * math.pi,
self.cfg.initial_pendulum_angle_range[1] * math.pi,
joint_pos[:, self._pendulum_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def normalize_angle(angle):
return (angle + math.pi) % (2 * math.pi) - math.pi
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_cart_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_pos: float,
rew_scale_pole_vel: float,
rew_scale_pendulum_pos: float,
rew_scale_pendulum_vel: float,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
pendulum_pos: torch.Tensor,
pendulum_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
total_reward = {
"cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
"pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
}
return total_reward

View File

@@ -0,0 +1,55 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectMARLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class MindbotMarlEnvCfg(DirectMARLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# multi-agent specification and spaces definition
possible_agents = ["cart", "pendulum"]
action_spaces = {"cart": 1, "pendulum": 1}
observation_spaces = {"cart": 4, "pendulum": 3}
state_space = -1
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
pendulum_dof_name = "pole_to_pendulum"
# - action scale
cart_action_scale = 100.0 # [N]
pendulum_action_scale = 50.0 # [Nm]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_cart_pos = 0
rew_scale_cart_vel = -0.01
rew_scale_pole_pos = -1.0
rew_scale_pole_vel = -0.01
rew_scale_pendulum_pos = -1.0
rew_scale_pendulum_vel = -0.01
# - reset states/conditions
initial_pendulum_angle_range = [-0.25, 0.25] # pendulum angle sample range on reset [rad]
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Mindbot-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,20 @@
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: !!float 0.2
policy_kwargs:
activation_fn: nn.ELU
net_arch: [32, 32]
squash_output: False
vf_coef: 1.0
max_grad_norm: 1.0
device: "cuda:0"

View File

@@ -0,0 +1,111 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: -2.9
fixed_log_std: True
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
discriminator: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [1024, 512]
activations: relu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# AMP memory (reference motion dataset)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
motion_dataset:
class: RandomMemory
memory_size: 200000
# AMP memory (preventing discriminator overfitting)
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
reply_buffer:
class: RandomMemory
memory_size: 1000000
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
agent:
class: AMP
rollouts: 16
learning_epochs: 6
mini_batches: 2
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
# rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid_amp_run"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 80000
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: IPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,82 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: MAPPO
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 1.0
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cart_double_pendulum_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,80 @@
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: GaussianMixin
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
initial_log_std: 0.0
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ACTIONS
value: # see deterministic_model parameters
class: DeterministicMixin
clip_actions: False
network:
- name: net
input: OBSERVATIONS
layers: [32, 32]
activations: elu
output: ONE
# Rollout memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: RandomMemory
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: PPO
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-04
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.1
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "cartpole_direct"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 4800
environment_info: log

View File

@@ -0,0 +1,10 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the environment."""
from isaaclab.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403

View File

@@ -0,0 +1,26 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from isaaclab.assets import Articulation
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils.math import wrap_to_pi
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedRLEnv
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)

View File

@@ -0,0 +1,180 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import SceneEntityCfg
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from . import mdp
##
# Pre-defined configs
##
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG # isort:skip
##
# Scene definition
##
@configclass
class MindbotSceneCfg(InteractiveSceneCfg):
"""Configuration for a cart-pole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robot
robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class EventCfg:
"""Configuration for events."""
# reset
reset_cart_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
"position_range": (-1.0, 1.0),
"velocity_range": (-0.5, 0.5),
},
)
reset_pole_position = EventTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
"position_range": (-0.25 * math.pi, 0.25 * math.pi),
"velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Constant running reward
alive = RewTerm(func=mdp.is_alive, weight=1.0)
# (2) Failure penalty
terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
# (3) Primary task: keep pole upright
pole_pos = RewTerm(
func=mdp.joint_pos_target_l2,
weight=-1.0,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
)
# (4) Shaping tasks: lower cart velocity
cart_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.01,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
)
# (5) Shaping tasks: lower pole angular velocity
pole_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.005,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Time out
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Cart out of bounds
cart_out_of_bounds = DoneTerm(
func=mdp.joint_pos_out_of_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
)
##
# Environment configuration
##
@configclass
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
# Scene settings
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 5
# viewer settings
self.viewer.eye = (8.0, 0.0, 5.0)
# simulation settings
self.sim.dt = 1 / 120
self.sim.render_interval = self.decimation

View File

@@ -0,0 +1,46 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import omni.ext
# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
def some_public_function(x: int):
print("[mindbot] some_public_function was called with x: ", x)
return x**x
# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
# on_shutdown() is called.
class ExampleExtension(omni.ext.IExt):
# ext_id is current extension id. It can be used with extension manager to query additional information, like where
# this extension is located on filesystem.
def on_startup(self, ext_id):
print("[mindbot] startup")
self._count = 0
self._window = omni.ui.Window("My Window", width=300, height=300)
with self._window.frame:
with omni.ui.VStack():
label = omni.ui.Label("")
def on_click():
self._count += 1
label.text = f"count: {self._count}"
def on_reset():
self._count = 0
label.text = "empty"
on_reset()
with omni.ui.HStack():
omni.ui.Button("Add", clicked_fn=on_click)
omni.ui.Button("Reset", clicked_fn=on_reset)
def on_shutdown(self):
print("[mindbot] shutdown")

View File

@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools", "wheel", "toml"]
build-backend = "setuptools.build_meta"

47
source/mindbot/setup.py Normal file
View File

@@ -0,0 +1,47 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Installation script for the 'mindbot' python package."""
import os
import toml
from setuptools import setup
# Obtain the extension data from the extension.toml file
EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
# Read the extension.toml file
EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
# Minimum dependencies required prior to installation
INSTALL_REQUIRES = [
# NOTE: Add dependencies
"psutil",
]
# Installation operation
setup(
name="mindbot",
packages=["mindbot"],
author=EXTENSION_TOML_DATA["package"]["author"],
maintainer=EXTENSION_TOML_DATA["package"]["maintainer"],
url=EXTENSION_TOML_DATA["package"]["repository"],
version=EXTENSION_TOML_DATA["package"]["version"],
description=EXTENSION_TOML_DATA["package"]["description"],
keywords=EXTENSION_TOML_DATA["package"]["keywords"],
install_requires=INSTALL_REQUIRES,
license="Apache-2.0",
include_package_data=True,
python_requires=">=3.10",
classifiers=[
"Natural Language :: English",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Isaac Sim :: 4.5.0",
"Isaac Sim :: 5.0.0",
"Isaac Sim :: 5.1.0",
],
zip_safe=False,
)