Initial commit
This commit is contained in:
35
source/mindbot/config/extension.toml
Normal file
35
source/mindbot/config/extension.toml
Normal file
@@ -0,0 +1,35 @@
|
||||
[package]
|
||||
|
||||
# Semantic Versioning is used: https://semver.org/
|
||||
version = "0.1.0"
|
||||
|
||||
# Description
|
||||
category = "isaaclab"
|
||||
readme = "README.md"
|
||||
|
||||
title = "Extension Template"
|
||||
author = "Isaac Lab Project Developers"
|
||||
maintainer = "Isaac Lab Project Developers"
|
||||
description="Extension Template for Isaac Lab"
|
||||
repository = "https://github.com/isaac-sim/IsaacLab.git"
|
||||
keywords = ["extension", "template", "isaaclab"]
|
||||
|
||||
[dependencies]
|
||||
"isaaclab" = {}
|
||||
"isaaclab_assets" = {}
|
||||
"isaaclab_mimic" = {}
|
||||
"isaaclab_rl" = {}
|
||||
"isaaclab_tasks" = {}
|
||||
# NOTE: Add additional dependencies here
|
||||
|
||||
[[python.module]]
|
||||
name = "mindbot"
|
||||
|
||||
[isaac_lab_settings]
|
||||
# TODO: Uncomment and list any apt dependencies here.
|
||||
# If none, leave it commented out.
|
||||
# apt_deps = ["example_package"]
|
||||
# TODO: Uncomment and provide path to a ros_ws
|
||||
# with rosdeps to be installed. If none,
|
||||
# leave it commented out.
|
||||
# ros_ws = "path/from/extension_root/to/ros_ws"
|
||||
10
source/mindbot/docs/CHANGELOG.rst
Normal file
10
source/mindbot/docs/CHANGELOG.rst
Normal file
@@ -0,0 +1,10 @@
|
||||
Changelog
|
||||
---------
|
||||
|
||||
0.1.0 (2025-11-13)
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Added
|
||||
^^^^^
|
||||
|
||||
* Created an initial template for building an extension or project based on Isaac Lab
|
||||
14
source/mindbot/mindbot/__init__.py
Normal file
14
source/mindbot/mindbot/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""
|
||||
Python module serving as a project/extension template.
|
||||
"""
|
||||
|
||||
# Register Gym environments.
|
||||
from .tasks import *
|
||||
|
||||
# Register UI extensions.
|
||||
from .ui_extension_example import *
|
||||
17
source/mindbot/mindbot/tasks/__init__.py
Normal file
17
source/mindbot/mindbot/tasks/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""Package containing task implementations for the extension."""
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
from isaaclab_tasks.utils import import_packages
|
||||
|
||||
# The blacklist is used to prevent importing configs from sub-packages
|
||||
_BLACKLIST_PKGS = ["utils", ".mdp"]
|
||||
# Import all configs in this package
|
||||
import_packages(__name__, _BLACKLIST_PKGS)
|
||||
6
source/mindbot/mindbot/tasks/direct/__init__.py
Normal file
6
source/mindbot/mindbot/tasks/direct/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym # noqa: F401
|
||||
29
source/mindbot/mindbot/tasks/direct/mindbot/__init__.py
Normal file
29
source/mindbot/mindbot/tasks/direct/mindbot/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from . import agents
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
|
||||
gym.register(
|
||||
id="Template-Mindbot-Direct-v0",
|
||||
entry_point=f"{__name__}.mindbot_env:MindbotEnv",
|
||||
disable_env_checker=True,
|
||||
kwargs={
|
||||
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
|
||||
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
|
||||
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
|
||||
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
|
||||
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
|
||||
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
|
||||
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
|
||||
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -0,0 +1,78 @@
|
||||
params:
|
||||
seed: 42
|
||||
|
||||
# environment wrapper clipping
|
||||
env:
|
||||
# added to the wrapper
|
||||
clip_observations: 5.0
|
||||
# can make custom wrapper?
|
||||
clip_actions: 1.0
|
||||
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
# doesn't have this fine grained control but made it close
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [32, 32]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
|
||||
initializer:
|
||||
name: default
|
||||
regularizer:
|
||||
name: None
|
||||
|
||||
load_checkpoint: False # flag which sets whether to load the checkpoint
|
||||
load_path: '' # path to the checkpoint to load
|
||||
|
||||
config:
|
||||
name: cartpole_direct
|
||||
env_name: rlgpu
|
||||
device: 'cuda:0'
|
||||
device_name: 'cuda:0'
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: False
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
num_actors: -1 # configured from the script (based on num_envs)
|
||||
reward_shaper:
|
||||
scale_value: 0.1
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau : 0.95
|
||||
learning_rate: 5e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 150
|
||||
save_best_after: 50
|
||||
save_frequency: 25
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.0
|
||||
truncate_grads: True
|
||||
e_clip: 0.2
|
||||
horizon_length: 32
|
||||
minibatch_size: 16384
|
||||
mini_epochs: 8
|
||||
critic_coef: 4
|
||||
clip_value: True
|
||||
seq_length: 4
|
||||
bounds_loss_coef: 0.0001
|
||||
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
|
||||
|
||||
|
||||
@configclass
|
||||
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
num_steps_per_env = 16
|
||||
max_iterations = 150
|
||||
save_interval = 50
|
||||
experiment_name = "cartpole_direct"
|
||||
policy = RslRlPpoActorCriticCfg(
|
||||
init_noise_std=1.0,
|
||||
actor_obs_normalization=False,
|
||||
critic_obs_normalization=False,
|
||||
actor_hidden_dims=[32, 32],
|
||||
critic_hidden_dims=[32, 32],
|
||||
activation="elu",
|
||||
)
|
||||
algorithm = RslRlPpoAlgorithmCfg(
|
||||
value_loss_coef=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
clip_param=0.2,
|
||||
entropy_coef=0.005,
|
||||
num_learning_epochs=5,
|
||||
num_mini_batches=4,
|
||||
learning_rate=1.0e-3,
|
||||
schedule="adaptive",
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
desired_kl=0.01,
|
||||
max_grad_norm=1.0,
|
||||
)
|
||||
@@ -0,0 +1,20 @@
|
||||
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
|
||||
seed: 42
|
||||
|
||||
n_timesteps: !!float 1e6
|
||||
policy: 'MlpPolicy'
|
||||
n_steps: 16
|
||||
batch_size: 4096
|
||||
gae_lambda: 0.95
|
||||
gamma: 0.99
|
||||
n_epochs: 20
|
||||
ent_coef: 0.01
|
||||
learning_rate: !!float 3e-4
|
||||
clip_range: !!float 0.2
|
||||
policy_kwargs:
|
||||
activation_fn: nn.ELU
|
||||
net_arch: [32, 32]
|
||||
squash_output: False
|
||||
vf_coef: 1.0
|
||||
max_grad_norm: 1.0
|
||||
device: "cuda:0"
|
||||
@@ -0,0 +1,111 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: -2.9
|
||||
fixed_log_std: True
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
discriminator: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
# AMP memory (reference motion dataset)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
motion_dataset:
|
||||
class: RandomMemory
|
||||
memory_size: 200000
|
||||
|
||||
# AMP memory (preventing discriminator overfitting)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
reply_buffer:
|
||||
class: RandomMemory
|
||||
memory_size: 1000000
|
||||
|
||||
|
||||
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
|
||||
agent:
|
||||
class: AMP
|
||||
rollouts: 16
|
||||
learning_epochs: 6
|
||||
mini_batches: 2
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-05
|
||||
learning_rate_scheduler: null
|
||||
learning_rate_scheduler_kwargs: null
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
amp_state_preprocessor: RunningStandardScaler
|
||||
amp_state_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 0.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.5
|
||||
discriminator_loss_scale: 5.0
|
||||
amp_batch_size: 512
|
||||
task_reward_weight: 0.0
|
||||
style_reward_weight: 1.0
|
||||
discriminator_batch_size: 4096
|
||||
discriminator_reward_scale: 2.0
|
||||
discriminator_logit_regularization_scale: 0.05
|
||||
discriminator_gradient_penalty_scale: 5.0
|
||||
discriminator_weight_decay_scale: 1.0e-04
|
||||
# rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "humanoid_amp_run"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 80000
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
|
||||
agent:
|
||||
class: IPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,82 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
|
||||
agent:
|
||||
class: MAPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
shared_state_preprocessor: RunningStandardScaler
|
||||
shared_state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
|
||||
agent:
|
||||
class: PPO
|
||||
rollouts: 32
|
||||
learning_epochs: 8
|
||||
mini_batches: 8
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 0.1
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cartpole_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
135
source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env.py
Normal file
135
source/mindbot/mindbot/tasks/direct/mindbot/mindbot_env.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import torch
|
||||
from collections.abc import Sequence
|
||||
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import Articulation
|
||||
from isaaclab.envs import DirectRLEnv
|
||||
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
|
||||
from isaaclab.utils.math import sample_uniform
|
||||
|
||||
from .mindbot_env_cfg import MindbotEnvCfg
|
||||
|
||||
|
||||
class MindbotEnv(DirectRLEnv):
|
||||
cfg: MindbotEnvCfg
|
||||
|
||||
def __init__(self, cfg: MindbotEnvCfg, render_mode: str | None = None, **kwargs):
|
||||
super().__init__(cfg, render_mode, **kwargs)
|
||||
|
||||
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
|
||||
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
|
||||
|
||||
self.joint_pos = self.robot.data.joint_pos
|
||||
self.joint_vel = self.robot.data.joint_vel
|
||||
|
||||
def _setup_scene(self):
|
||||
self.robot = Articulation(self.cfg.robot_cfg)
|
||||
# add ground plane
|
||||
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
|
||||
# clone and replicate
|
||||
self.scene.clone_environments(copy_from_source=False)
|
||||
# we need to explicitly filter collisions for CPU simulation
|
||||
if self.device == "cpu":
|
||||
self.scene.filter_collisions(global_prim_paths=[])
|
||||
# add articulation to scene
|
||||
self.scene.articulations["robot"] = self.robot
|
||||
# add lights
|
||||
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
|
||||
light_cfg.func("/World/Light", light_cfg)
|
||||
|
||||
def _pre_physics_step(self, actions: torch.Tensor) -> None:
|
||||
self.actions = actions.clone()
|
||||
|
||||
def _apply_action(self) -> None:
|
||||
self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
|
||||
|
||||
def _get_observations(self) -> dict:
|
||||
obs = torch.cat(
|
||||
(
|
||||
self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
|
||||
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
|
||||
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
|
||||
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
|
||||
),
|
||||
dim=-1,
|
||||
)
|
||||
observations = {"policy": obs}
|
||||
return observations
|
||||
|
||||
def _get_rewards(self) -> torch.Tensor:
|
||||
total_reward = compute_rewards(
|
||||
self.cfg.rew_scale_alive,
|
||||
self.cfg.rew_scale_terminated,
|
||||
self.cfg.rew_scale_pole_pos,
|
||||
self.cfg.rew_scale_cart_vel,
|
||||
self.cfg.rew_scale_pole_vel,
|
||||
self.joint_pos[:, self._pole_dof_idx[0]],
|
||||
self.joint_vel[:, self._pole_dof_idx[0]],
|
||||
self.joint_pos[:, self._cart_dof_idx[0]],
|
||||
self.joint_vel[:, self._cart_dof_idx[0]],
|
||||
self.reset_terminated,
|
||||
)
|
||||
return total_reward
|
||||
|
||||
def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
self.joint_pos = self.robot.data.joint_pos
|
||||
self.joint_vel = self.robot.data.joint_vel
|
||||
|
||||
time_out = self.episode_length_buf >= self.max_episode_length - 1
|
||||
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
|
||||
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
|
||||
return out_of_bounds, time_out
|
||||
|
||||
def _reset_idx(self, env_ids: Sequence[int] | None):
|
||||
if env_ids is None:
|
||||
env_ids = self.robot._ALL_INDICES
|
||||
super()._reset_idx(env_ids)
|
||||
|
||||
joint_pos = self.robot.data.default_joint_pos[env_ids]
|
||||
joint_pos[:, self._pole_dof_idx] += sample_uniform(
|
||||
self.cfg.initial_pole_angle_range[0] * math.pi,
|
||||
self.cfg.initial_pole_angle_range[1] * math.pi,
|
||||
joint_pos[:, self._pole_dof_idx].shape,
|
||||
joint_pos.device,
|
||||
)
|
||||
joint_vel = self.robot.data.default_joint_vel[env_ids]
|
||||
|
||||
default_root_state = self.robot.data.default_root_state[env_ids]
|
||||
default_root_state[:, :3] += self.scene.env_origins[env_ids]
|
||||
|
||||
self.joint_pos[env_ids] = joint_pos
|
||||
self.joint_vel[env_ids] = joint_vel
|
||||
|
||||
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
|
||||
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
|
||||
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def compute_rewards(
|
||||
rew_scale_alive: float,
|
||||
rew_scale_terminated: float,
|
||||
rew_scale_pole_pos: float,
|
||||
rew_scale_cart_vel: float,
|
||||
rew_scale_pole_vel: float,
|
||||
pole_pos: torch.Tensor,
|
||||
pole_vel: torch.Tensor,
|
||||
cart_pos: torch.Tensor,
|
||||
cart_vel: torch.Tensor,
|
||||
reset_terminated: torch.Tensor,
|
||||
):
|
||||
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
|
||||
rew_termination = rew_scale_terminated * reset_terminated.float()
|
||||
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
|
||||
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
|
||||
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
|
||||
total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
|
||||
return total_reward
|
||||
@@ -0,0 +1,48 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
|
||||
|
||||
from isaaclab.assets import ArticulationCfg
|
||||
from isaaclab.envs import DirectRLEnvCfg
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.sim import SimulationCfg
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotEnvCfg(DirectRLEnvCfg):
|
||||
# env
|
||||
decimation = 2
|
||||
episode_length_s = 5.0
|
||||
# - spaces definition
|
||||
action_space = 1
|
||||
observation_space = 4
|
||||
state_space = 0
|
||||
|
||||
# simulation
|
||||
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
|
||||
|
||||
# robot(s)
|
||||
robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
|
||||
|
||||
# scene
|
||||
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
|
||||
|
||||
# custom parameters/scales
|
||||
# - controllable joint
|
||||
cart_dof_name = "slider_to_cart"
|
||||
pole_dof_name = "cart_to_pole"
|
||||
# - action scale
|
||||
action_scale = 100.0 # [N]
|
||||
# - reward scales
|
||||
rew_scale_alive = 1.0
|
||||
rew_scale_terminated = -2.0
|
||||
rew_scale_pole_pos = -1.0
|
||||
rew_scale_cart_vel = -0.01
|
||||
rew_scale_pole_vel = -0.005
|
||||
# - reset states/conditions
|
||||
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
|
||||
max_cart_pos = 3.0 # reset if cart exceeds this position [m]
|
||||
29
source/mindbot/mindbot/tasks/direct/mindbot_marl/__init__.py
Normal file
29
source/mindbot/mindbot/tasks/direct/mindbot_marl/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from . import agents
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
|
||||
gym.register(
|
||||
id="Template-Mindbot-Marl-Direct-v0",
|
||||
entry_point=f"{__name__}.mindbot_marl_env:MindbotMarlEnv",
|
||||
disable_env_checker=True,
|
||||
kwargs={
|
||||
"env_cfg_entry_point": f"{__name__}.mindbot_marl_env_cfg:MindbotMarlEnvCfg",
|
||||
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
|
||||
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
|
||||
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
|
||||
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
|
||||
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
|
||||
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
|
||||
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -0,0 +1,78 @@
|
||||
params:
|
||||
seed: 42
|
||||
|
||||
# environment wrapper clipping
|
||||
env:
|
||||
# added to the wrapper
|
||||
clip_observations: 5.0
|
||||
# can make custom wrapper?
|
||||
clip_actions: 1.0
|
||||
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
# doesn't have this fine grained control but made it close
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [32, 32]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
|
||||
initializer:
|
||||
name: default
|
||||
regularizer:
|
||||
name: None
|
||||
|
||||
load_checkpoint: False # flag which sets whether to load the checkpoint
|
||||
load_path: '' # path to the checkpoint to load
|
||||
|
||||
config:
|
||||
name: cartpole_direct
|
||||
env_name: rlgpu
|
||||
device: 'cuda:0'
|
||||
device_name: 'cuda:0'
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: False
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
num_actors: -1 # configured from the script (based on num_envs)
|
||||
reward_shaper:
|
||||
scale_value: 0.1
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau : 0.95
|
||||
learning_rate: 5e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 150
|
||||
save_best_after: 50
|
||||
save_frequency: 25
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.0
|
||||
truncate_grads: True
|
||||
e_clip: 0.2
|
||||
horizon_length: 32
|
||||
minibatch_size: 16384
|
||||
mini_epochs: 8
|
||||
critic_coef: 4
|
||||
clip_value: True
|
||||
seq_length: 4
|
||||
bounds_loss_coef: 0.0001
|
||||
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
|
||||
|
||||
|
||||
@configclass
|
||||
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
num_steps_per_env = 16
|
||||
max_iterations = 150
|
||||
save_interval = 50
|
||||
experiment_name = "cartpole_direct"
|
||||
policy = RslRlPpoActorCriticCfg(
|
||||
init_noise_std=1.0,
|
||||
actor_obs_normalization=False,
|
||||
critic_obs_normalization=False,
|
||||
actor_hidden_dims=[32, 32],
|
||||
critic_hidden_dims=[32, 32],
|
||||
activation="elu",
|
||||
)
|
||||
algorithm = RslRlPpoAlgorithmCfg(
|
||||
value_loss_coef=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
clip_param=0.2,
|
||||
entropy_coef=0.005,
|
||||
num_learning_epochs=5,
|
||||
num_mini_batches=4,
|
||||
learning_rate=1.0e-3,
|
||||
schedule="adaptive",
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
desired_kl=0.01,
|
||||
max_grad_norm=1.0,
|
||||
)
|
||||
@@ -0,0 +1,20 @@
|
||||
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
|
||||
seed: 42
|
||||
|
||||
n_timesteps: !!float 1e6
|
||||
policy: 'MlpPolicy'
|
||||
n_steps: 16
|
||||
batch_size: 4096
|
||||
gae_lambda: 0.95
|
||||
gamma: 0.99
|
||||
n_epochs: 20
|
||||
ent_coef: 0.01
|
||||
learning_rate: !!float 3e-4
|
||||
clip_range: !!float 0.2
|
||||
policy_kwargs:
|
||||
activation_fn: nn.ELU
|
||||
net_arch: [32, 32]
|
||||
squash_output: False
|
||||
vf_coef: 1.0
|
||||
max_grad_norm: 1.0
|
||||
device: "cuda:0"
|
||||
@@ -0,0 +1,111 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: -2.9
|
||||
fixed_log_std: True
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
discriminator: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
# AMP memory (reference motion dataset)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
motion_dataset:
|
||||
class: RandomMemory
|
||||
memory_size: 200000
|
||||
|
||||
# AMP memory (preventing discriminator overfitting)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
reply_buffer:
|
||||
class: RandomMemory
|
||||
memory_size: 1000000
|
||||
|
||||
|
||||
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
|
||||
agent:
|
||||
class: AMP
|
||||
rollouts: 16
|
||||
learning_epochs: 6
|
||||
mini_batches: 2
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-05
|
||||
learning_rate_scheduler: null
|
||||
learning_rate_scheduler_kwargs: null
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
amp_state_preprocessor: RunningStandardScaler
|
||||
amp_state_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 0.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.5
|
||||
discriminator_loss_scale: 5.0
|
||||
amp_batch_size: 512
|
||||
task_reward_weight: 0.0
|
||||
style_reward_weight: 1.0
|
||||
discriminator_batch_size: 4096
|
||||
discriminator_reward_scale: 2.0
|
||||
discriminator_logit_regularization_scale: 0.05
|
||||
discriminator_gradient_penalty_scale: 5.0
|
||||
discriminator_weight_decay_scale: 1.0e-04
|
||||
# rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "humanoid_amp_run"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 80000
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
|
||||
agent:
|
||||
class: IPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,82 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
|
||||
agent:
|
||||
class: MAPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
shared_state_preprocessor: RunningStandardScaler
|
||||
shared_state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
|
||||
agent:
|
||||
class: PPO
|
||||
rollouts: 32
|
||||
learning_epochs: 8
|
||||
mini_batches: 8
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 0.1
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cartpole_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,184 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import torch
|
||||
from collections.abc import Sequence
|
||||
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import Articulation
|
||||
from isaaclab.envs import DirectMARLEnv
|
||||
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
|
||||
from isaaclab.utils.math import sample_uniform
|
||||
|
||||
from .mindbot_marl_env_cfg import MindbotMarlEnvCfg
|
||||
|
||||
|
||||
class MindbotMarlEnv(DirectMARLEnv):
|
||||
cfg: MindbotMarlEnvCfg
|
||||
|
||||
def __init__(self, cfg: MindbotMarlEnvCfg, render_mode: str | None = None, **kwargs):
|
||||
super().__init__(cfg, render_mode, **kwargs)
|
||||
|
||||
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
|
||||
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
|
||||
self._pendulum_dof_idx, _ = self.robot.find_joints(self.cfg.pendulum_dof_name)
|
||||
|
||||
self.joint_pos = self.robot.data.joint_pos
|
||||
self.joint_vel = self.robot.data.joint_vel
|
||||
|
||||
def _setup_scene(self):
|
||||
self.robot = Articulation(self.cfg.robot_cfg)
|
||||
# add ground plane
|
||||
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
|
||||
# clone and replicate
|
||||
self.scene.clone_environments(copy_from_source=False)
|
||||
# we need to explicitly filter collisions for CPU simulation
|
||||
if self.device == "cpu":
|
||||
self.scene.filter_collisions(global_prim_paths=[])
|
||||
# add articulation to scene
|
||||
self.scene.articulations["robot"] = self.robot
|
||||
# add lights
|
||||
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
|
||||
light_cfg.func("/World/Light", light_cfg)
|
||||
|
||||
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
|
||||
self.actions = actions
|
||||
|
||||
def _apply_action(self) -> None:
|
||||
self.robot.set_joint_effort_target(
|
||||
self.actions["cart"] * self.cfg.cart_action_scale, joint_ids=self._cart_dof_idx
|
||||
)
|
||||
self.robot.set_joint_effort_target(
|
||||
self.actions["pendulum"] * self.cfg.pendulum_action_scale, joint_ids=self._pendulum_dof_idx
|
||||
)
|
||||
|
||||
def _get_observations(self) -> dict[str, torch.Tensor]:
|
||||
pole_joint_pos = normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1))
|
||||
pendulum_joint_pos = normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1))
|
||||
observations = {
|
||||
"cart": torch.cat(
|
||||
(
|
||||
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
|
||||
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
|
||||
pole_joint_pos,
|
||||
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
|
||||
),
|
||||
dim=-1,
|
||||
),
|
||||
"pendulum": torch.cat(
|
||||
(
|
||||
pole_joint_pos + pendulum_joint_pos,
|
||||
pendulum_joint_pos,
|
||||
self.joint_vel[:, self._pendulum_dof_idx[0]].unsqueeze(dim=1),
|
||||
),
|
||||
dim=-1,
|
||||
),
|
||||
}
|
||||
return observations
|
||||
|
||||
def _get_rewards(self) -> dict[str, torch.Tensor]:
|
||||
total_reward = compute_rewards(
|
||||
self.cfg.rew_scale_alive,
|
||||
self.cfg.rew_scale_terminated,
|
||||
self.cfg.rew_scale_cart_pos,
|
||||
self.cfg.rew_scale_cart_vel,
|
||||
self.cfg.rew_scale_pole_pos,
|
||||
self.cfg.rew_scale_pole_vel,
|
||||
self.cfg.rew_scale_pendulum_pos,
|
||||
self.cfg.rew_scale_pendulum_vel,
|
||||
self.joint_pos[:, self._cart_dof_idx[0]],
|
||||
self.joint_vel[:, self._cart_dof_idx[0]],
|
||||
normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
|
||||
self.joint_vel[:, self._pole_dof_idx[0]],
|
||||
normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
|
||||
self.joint_vel[:, self._pendulum_dof_idx[0]],
|
||||
math.prod(self.terminated_dict.values()),
|
||||
)
|
||||
return total_reward
|
||||
|
||||
def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
|
||||
self.joint_pos = self.robot.data.joint_pos
|
||||
self.joint_vel = self.robot.data.joint_vel
|
||||
|
||||
time_out = self.episode_length_buf >= self.max_episode_length - 1
|
||||
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
|
||||
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
|
||||
|
||||
terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
|
||||
time_outs = {agent: time_out for agent in self.cfg.possible_agents}
|
||||
return terminated, time_outs
|
||||
|
||||
def _reset_idx(self, env_ids: Sequence[int] | None):
|
||||
if env_ids is None:
|
||||
env_ids = self.robot._ALL_INDICES
|
||||
super()._reset_idx(env_ids)
|
||||
|
||||
joint_pos = self.robot.data.default_joint_pos[env_ids]
|
||||
joint_pos[:, self._pole_dof_idx] += sample_uniform(
|
||||
self.cfg.initial_pole_angle_range[0] * math.pi,
|
||||
self.cfg.initial_pole_angle_range[1] * math.pi,
|
||||
joint_pos[:, self._pole_dof_idx].shape,
|
||||
joint_pos.device,
|
||||
)
|
||||
joint_pos[:, self._pendulum_dof_idx] += sample_uniform(
|
||||
self.cfg.initial_pendulum_angle_range[0] * math.pi,
|
||||
self.cfg.initial_pendulum_angle_range[1] * math.pi,
|
||||
joint_pos[:, self._pendulum_dof_idx].shape,
|
||||
joint_pos.device,
|
||||
)
|
||||
joint_vel = self.robot.data.default_joint_vel[env_ids]
|
||||
|
||||
default_root_state = self.robot.data.default_root_state[env_ids]
|
||||
default_root_state[:, :3] += self.scene.env_origins[env_ids]
|
||||
|
||||
self.joint_pos[env_ids] = joint_pos
|
||||
self.joint_vel[env_ids] = joint_vel
|
||||
|
||||
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
|
||||
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
|
||||
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def normalize_angle(angle):
|
||||
return (angle + math.pi) % (2 * math.pi) - math.pi
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def compute_rewards(
|
||||
rew_scale_alive: float,
|
||||
rew_scale_terminated: float,
|
||||
rew_scale_cart_pos: float,
|
||||
rew_scale_cart_vel: float,
|
||||
rew_scale_pole_pos: float,
|
||||
rew_scale_pole_vel: float,
|
||||
rew_scale_pendulum_pos: float,
|
||||
rew_scale_pendulum_vel: float,
|
||||
cart_pos: torch.Tensor,
|
||||
cart_vel: torch.Tensor,
|
||||
pole_pos: torch.Tensor,
|
||||
pole_vel: torch.Tensor,
|
||||
pendulum_pos: torch.Tensor,
|
||||
pendulum_vel: torch.Tensor,
|
||||
reset_terminated: torch.Tensor,
|
||||
):
|
||||
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
|
||||
rew_termination = rew_scale_terminated * reset_terminated.float()
|
||||
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
|
||||
rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
|
||||
torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
|
||||
)
|
||||
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
|
||||
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
|
||||
rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
|
||||
|
||||
total_reward = {
|
||||
"cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
|
||||
"pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
|
||||
}
|
||||
return total_reward
|
||||
@@ -0,0 +1,55 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab_assets.robots.cart_double_pendulum import CART_DOUBLE_PENDULUM_CFG
|
||||
|
||||
from isaaclab.assets import ArticulationCfg
|
||||
from isaaclab.envs import DirectMARLEnvCfg
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.sim import SimulationCfg
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotMarlEnvCfg(DirectMARLEnvCfg):
|
||||
# env
|
||||
decimation = 2
|
||||
episode_length_s = 5.0
|
||||
# multi-agent specification and spaces definition
|
||||
possible_agents = ["cart", "pendulum"]
|
||||
action_spaces = {"cart": 1, "pendulum": 1}
|
||||
observation_spaces = {"cart": 4, "pendulum": 3}
|
||||
state_space = -1
|
||||
|
||||
# simulation
|
||||
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
|
||||
|
||||
# robot(s)
|
||||
robot_cfg: ArticulationCfg = CART_DOUBLE_PENDULUM_CFG.replace(prim_path="/World/envs/env_.*/Robot")
|
||||
|
||||
# scene
|
||||
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
|
||||
|
||||
# custom parameters/scales
|
||||
# - controllable joint
|
||||
cart_dof_name = "slider_to_cart"
|
||||
pole_dof_name = "cart_to_pole"
|
||||
pendulum_dof_name = "pole_to_pendulum"
|
||||
# - action scale
|
||||
cart_action_scale = 100.0 # [N]
|
||||
pendulum_action_scale = 50.0 # [Nm]
|
||||
# - reward scales
|
||||
rew_scale_alive = 1.0
|
||||
rew_scale_terminated = -2.0
|
||||
rew_scale_cart_pos = 0
|
||||
rew_scale_cart_vel = -0.01
|
||||
rew_scale_pole_pos = -1.0
|
||||
rew_scale_pole_vel = -0.01
|
||||
rew_scale_pendulum_pos = -1.0
|
||||
rew_scale_pendulum_vel = -0.01
|
||||
# - reset states/conditions
|
||||
initial_pendulum_angle_range = [-0.25, 0.25] # pendulum angle sample range on reset [rad]
|
||||
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
|
||||
max_cart_pos = 3.0 # reset if cart exceeds this position [m]
|
||||
6
source/mindbot/mindbot/tasks/manager_based/__init__.py
Normal file
6
source/mindbot/mindbot/tasks/manager_based/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym # noqa: F401
|
||||
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from . import agents
|
||||
|
||||
##
|
||||
# Register Gym environments.
|
||||
##
|
||||
|
||||
|
||||
gym.register(
|
||||
id="Template-Mindbot-v0",
|
||||
entry_point="isaaclab.envs:ManagerBasedRLEnv",
|
||||
disable_env_checker=True,
|
||||
kwargs={
|
||||
"env_cfg_entry_point": f"{__name__}.mindbot_env_cfg:MindbotEnvCfg",
|
||||
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
|
||||
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
|
||||
"skrl_amp_cfg_entry_point": f"{agents.__name__}:skrl_amp_cfg.yaml",
|
||||
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
|
||||
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
|
||||
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
|
||||
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -0,0 +1,78 @@
|
||||
params:
|
||||
seed: 42
|
||||
|
||||
# environment wrapper clipping
|
||||
env:
|
||||
# added to the wrapper
|
||||
clip_observations: 5.0
|
||||
# can make custom wrapper?
|
||||
clip_actions: 1.0
|
||||
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
# doesn't have this fine grained control but made it close
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [32, 32]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
|
||||
initializer:
|
||||
name: default
|
||||
regularizer:
|
||||
name: None
|
||||
|
||||
load_checkpoint: False # flag which sets whether to load the checkpoint
|
||||
load_path: '' # path to the checkpoint to load
|
||||
|
||||
config:
|
||||
name: cartpole_direct
|
||||
env_name: rlgpu
|
||||
device: 'cuda:0'
|
||||
device_name: 'cuda:0'
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: False
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
num_actors: -1 # configured from the script (based on num_envs)
|
||||
reward_shaper:
|
||||
scale_value: 0.1
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau : 0.95
|
||||
learning_rate: 5e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 150
|
||||
save_best_after: 50
|
||||
save_frequency: 25
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.0
|
||||
truncate_grads: True
|
||||
e_clip: 0.2
|
||||
horizon_length: 32
|
||||
minibatch_size: 16384
|
||||
mini_epochs: 8
|
||||
critic_coef: 4
|
||||
clip_value: True
|
||||
seq_length: 4
|
||||
bounds_loss_coef: 0.0001
|
||||
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
|
||||
|
||||
|
||||
@configclass
|
||||
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
|
||||
num_steps_per_env = 16
|
||||
max_iterations = 150
|
||||
save_interval = 50
|
||||
experiment_name = "cartpole_direct"
|
||||
policy = RslRlPpoActorCriticCfg(
|
||||
init_noise_std=1.0,
|
||||
actor_obs_normalization=False,
|
||||
critic_obs_normalization=False,
|
||||
actor_hidden_dims=[32, 32],
|
||||
critic_hidden_dims=[32, 32],
|
||||
activation="elu",
|
||||
)
|
||||
algorithm = RslRlPpoAlgorithmCfg(
|
||||
value_loss_coef=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
clip_param=0.2,
|
||||
entropy_coef=0.005,
|
||||
num_learning_epochs=5,
|
||||
num_mini_batches=4,
|
||||
learning_rate=1.0e-3,
|
||||
schedule="adaptive",
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
desired_kl=0.01,
|
||||
max_grad_norm=1.0,
|
||||
)
|
||||
@@ -0,0 +1,20 @@
|
||||
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
|
||||
seed: 42
|
||||
|
||||
n_timesteps: !!float 1e6
|
||||
policy: 'MlpPolicy'
|
||||
n_steps: 16
|
||||
batch_size: 4096
|
||||
gae_lambda: 0.95
|
||||
gamma: 0.99
|
||||
n_epochs: 20
|
||||
ent_coef: 0.01
|
||||
learning_rate: !!float 3e-4
|
||||
clip_range: !!float 0.2
|
||||
policy_kwargs:
|
||||
activation_fn: nn.ELU
|
||||
net_arch: [32, 32]
|
||||
squash_output: False
|
||||
vf_coef: 1.0
|
||||
max_grad_norm: 1.0
|
||||
device: "cuda:0"
|
||||
@@ -0,0 +1,111 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: -2.9
|
||||
fixed_log_std: True
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
discriminator: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [1024, 512]
|
||||
activations: relu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
# AMP memory (reference motion dataset)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
motion_dataset:
|
||||
class: RandomMemory
|
||||
memory_size: 200000
|
||||
|
||||
# AMP memory (preventing discriminator overfitting)
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
reply_buffer:
|
||||
class: RandomMemory
|
||||
memory_size: 1000000
|
||||
|
||||
|
||||
# AMP agent configuration (field names are from AMP_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/amp.html
|
||||
agent:
|
||||
class: AMP
|
||||
rollouts: 16
|
||||
learning_epochs: 6
|
||||
mini_batches: 2
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-05
|
||||
learning_rate_scheduler: null
|
||||
learning_rate_scheduler_kwargs: null
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
amp_state_preprocessor: RunningStandardScaler
|
||||
amp_state_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 0.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.5
|
||||
discriminator_loss_scale: 5.0
|
||||
amp_batch_size: 512
|
||||
task_reward_weight: 0.0
|
||||
style_reward_weight: 1.0
|
||||
discriminator_batch_size: 4096
|
||||
discriminator_reward_scale: 2.0
|
||||
discriminator_logit_regularization_scale: 0.05
|
||||
discriminator_gradient_penalty_scale: 5.0
|
||||
discriminator_weight_decay_scale: 1.0e-04
|
||||
# rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "humanoid_amp_run"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 80000
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
|
||||
agent:
|
||||
class: IPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,82 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: True
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
|
||||
agent:
|
||||
class: MAPPO
|
||||
rollouts: 16
|
||||
learning_epochs: 8
|
||||
mini_batches: 1
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 3.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
shared_state_preprocessor: RunningStandardScaler
|
||||
shared_state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 1.0
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cart_double_pendulum_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,80 @@
|
||||
seed: 42
|
||||
|
||||
|
||||
# Models are instantiated using skrl's model instantiator utility
|
||||
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
|
||||
models:
|
||||
separate: False
|
||||
policy: # see gaussian_model parameters
|
||||
class: GaussianMixin
|
||||
clip_actions: False
|
||||
clip_log_std: True
|
||||
min_log_std: -20.0
|
||||
max_log_std: 2.0
|
||||
initial_log_std: 0.0
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ACTIONS
|
||||
value: # see deterministic_model parameters
|
||||
class: DeterministicMixin
|
||||
clip_actions: False
|
||||
network:
|
||||
- name: net
|
||||
input: OBSERVATIONS
|
||||
layers: [32, 32]
|
||||
activations: elu
|
||||
output: ONE
|
||||
|
||||
|
||||
# Rollout memory
|
||||
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
|
||||
memory:
|
||||
class: RandomMemory
|
||||
memory_size: -1 # automatically determined (same as agent:rollouts)
|
||||
|
||||
|
||||
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
|
||||
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
|
||||
agent:
|
||||
class: PPO
|
||||
rollouts: 32
|
||||
learning_epochs: 8
|
||||
mini_batches: 8
|
||||
discount_factor: 0.99
|
||||
lambda: 0.95
|
||||
learning_rate: 5.0e-04
|
||||
learning_rate_scheduler: KLAdaptiveLR
|
||||
learning_rate_scheduler_kwargs:
|
||||
kl_threshold: 0.008
|
||||
state_preprocessor: RunningStandardScaler
|
||||
state_preprocessor_kwargs: null
|
||||
value_preprocessor: RunningStandardScaler
|
||||
value_preprocessor_kwargs: null
|
||||
random_timesteps: 0
|
||||
learning_starts: 0
|
||||
grad_norm_clip: 1.0
|
||||
ratio_clip: 0.2
|
||||
value_clip: 0.2
|
||||
clip_predicted_values: True
|
||||
entropy_loss_scale: 0.0
|
||||
value_loss_scale: 2.0
|
||||
kl_threshold: 0.0
|
||||
rewards_shaper_scale: 0.1
|
||||
time_limit_bootstrap: False
|
||||
# logging and checkpoint
|
||||
experiment:
|
||||
directory: "cartpole_direct"
|
||||
experiment_name: ""
|
||||
write_interval: auto
|
||||
checkpoint_interval: auto
|
||||
|
||||
|
||||
# Sequential trainer
|
||||
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
|
||||
trainer:
|
||||
class: SequentialTrainer
|
||||
timesteps: 4800
|
||||
environment_info: log
|
||||
@@ -0,0 +1,10 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""This sub-module contains the functions that are specific to the environment."""
|
||||
|
||||
from isaaclab.envs.mdp import * # noqa: F401, F403
|
||||
|
||||
from .rewards import * # noqa: F401, F403
|
||||
@@ -0,0 +1,26 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from isaaclab.assets import Articulation
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.utils.math import wrap_to_pi
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from isaaclab.envs import ManagerBasedRLEnv
|
||||
|
||||
|
||||
def joint_pos_target_l2(env: ManagerBasedRLEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
"""Penalize joint position deviation from a target value."""
|
||||
# extract the used quantities (to enable type-hinting)
|
||||
asset: Articulation = env.scene[asset_cfg.name]
|
||||
# wrap the joint positions to (-pi, pi)
|
||||
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
|
||||
# compute the reward
|
||||
return torch.sum(torch.square(joint_pos - target), dim=1)
|
||||
@@ -0,0 +1,180 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
|
||||
import isaaclab.sim as sim_utils
|
||||
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
|
||||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||||
from isaaclab.managers import EventTermCfg as EventTerm
|
||||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||||
from isaaclab.scene import InteractiveSceneCfg
|
||||
from isaaclab.utils import configclass
|
||||
|
||||
from . import mdp
|
||||
|
||||
##
|
||||
# Pre-defined configs
|
||||
##
|
||||
|
||||
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG # isort:skip
|
||||
|
||||
|
||||
##
|
||||
# Scene definition
|
||||
##
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotSceneCfg(InteractiveSceneCfg):
|
||||
"""Configuration for a cart-pole scene."""
|
||||
|
||||
# ground plane
|
||||
ground = AssetBaseCfg(
|
||||
prim_path="/World/ground",
|
||||
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
|
||||
)
|
||||
|
||||
# robot
|
||||
robot: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
|
||||
|
||||
# lights
|
||||
dome_light = AssetBaseCfg(
|
||||
prim_path="/World/DomeLight",
|
||||
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
|
||||
)
|
||||
|
||||
|
||||
##
|
||||
# MDP settings
|
||||
##
|
||||
|
||||
|
||||
@configclass
|
||||
class ActionsCfg:
|
||||
"""Action specifications for the MDP."""
|
||||
|
||||
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
|
||||
|
||||
|
||||
@configclass
|
||||
class ObservationsCfg:
|
||||
"""Observation specifications for the MDP."""
|
||||
|
||||
@configclass
|
||||
class PolicyCfg(ObsGroup):
|
||||
"""Observations for policy group."""
|
||||
|
||||
# observation terms (order preserved)
|
||||
joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
|
||||
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.enable_corruption = False
|
||||
self.concatenate_terms = True
|
||||
|
||||
# observation groups
|
||||
policy: PolicyCfg = PolicyCfg()
|
||||
|
||||
|
||||
@configclass
|
||||
class EventCfg:
|
||||
"""Configuration for events."""
|
||||
|
||||
# reset
|
||||
reset_cart_position = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
|
||||
"position_range": (-1.0, 1.0),
|
||||
"velocity_range": (-0.5, 0.5),
|
||||
},
|
||||
)
|
||||
|
||||
reset_pole_position = EventTerm(
|
||||
func=mdp.reset_joints_by_offset,
|
||||
mode="reset",
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
|
||||
"position_range": (-0.25 * math.pi, 0.25 * math.pi),
|
||||
"velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class RewardsCfg:
|
||||
"""Reward terms for the MDP."""
|
||||
|
||||
# (1) Constant running reward
|
||||
alive = RewTerm(func=mdp.is_alive, weight=1.0)
|
||||
# (2) Failure penalty
|
||||
terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
|
||||
# (3) Primary task: keep pole upright
|
||||
pole_pos = RewTerm(
|
||||
func=mdp.joint_pos_target_l2,
|
||||
weight=-1.0,
|
||||
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
|
||||
)
|
||||
# (4) Shaping tasks: lower cart velocity
|
||||
cart_vel = RewTerm(
|
||||
func=mdp.joint_vel_l1,
|
||||
weight=-0.01,
|
||||
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
|
||||
)
|
||||
# (5) Shaping tasks: lower pole angular velocity
|
||||
pole_vel = RewTerm(
|
||||
func=mdp.joint_vel_l1,
|
||||
weight=-0.005,
|
||||
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class TerminationsCfg:
|
||||
"""Termination terms for the MDP."""
|
||||
|
||||
# (1) Time out
|
||||
time_out = DoneTerm(func=mdp.time_out, time_out=True)
|
||||
# (2) Cart out of bounds
|
||||
cart_out_of_bounds = DoneTerm(
|
||||
func=mdp.joint_pos_out_of_manual_limit,
|
||||
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
|
||||
)
|
||||
|
||||
|
||||
##
|
||||
# Environment configuration
|
||||
##
|
||||
|
||||
|
||||
@configclass
|
||||
class MindbotEnvCfg(ManagerBasedRLEnvCfg):
|
||||
# Scene settings
|
||||
scene: MindbotSceneCfg = MindbotSceneCfg(num_envs=4096, env_spacing=4.0)
|
||||
# Basic settings
|
||||
observations: ObservationsCfg = ObservationsCfg()
|
||||
actions: ActionsCfg = ActionsCfg()
|
||||
events: EventCfg = EventCfg()
|
||||
# MDP settings
|
||||
rewards: RewardsCfg = RewardsCfg()
|
||||
terminations: TerminationsCfg = TerminationsCfg()
|
||||
|
||||
# Post initialization
|
||||
def __post_init__(self) -> None:
|
||||
"""Post initialization."""
|
||||
# general settings
|
||||
self.decimation = 2
|
||||
self.episode_length_s = 5
|
||||
# viewer settings
|
||||
self.viewer.eye = (8.0, 0.0, 5.0)
|
||||
# simulation settings
|
||||
self.sim.dt = 1 / 120
|
||||
self.sim.render_interval = self.decimation
|
||||
46
source/mindbot/mindbot/ui_extension_example.py
Normal file
46
source/mindbot/mindbot/ui_extension_example.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import omni.ext
|
||||
|
||||
|
||||
# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
|
||||
def some_public_function(x: int):
|
||||
print("[mindbot] some_public_function was called with x: ", x)
|
||||
return x**x
|
||||
|
||||
|
||||
# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
|
||||
# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
|
||||
# on_shutdown() is called.
|
||||
class ExampleExtension(omni.ext.IExt):
|
||||
# ext_id is current extension id. It can be used with extension manager to query additional information, like where
|
||||
# this extension is located on filesystem.
|
||||
def on_startup(self, ext_id):
|
||||
print("[mindbot] startup")
|
||||
|
||||
self._count = 0
|
||||
|
||||
self._window = omni.ui.Window("My Window", width=300, height=300)
|
||||
with self._window.frame:
|
||||
with omni.ui.VStack():
|
||||
label = omni.ui.Label("")
|
||||
|
||||
def on_click():
|
||||
self._count += 1
|
||||
label.text = f"count: {self._count}"
|
||||
|
||||
def on_reset():
|
||||
self._count = 0
|
||||
label.text = "empty"
|
||||
|
||||
on_reset()
|
||||
|
||||
with omni.ui.HStack():
|
||||
omni.ui.Button("Add", clicked_fn=on_click)
|
||||
omni.ui.Button("Reset", clicked_fn=on_reset)
|
||||
|
||||
def on_shutdown(self):
|
||||
print("[mindbot] shutdown")
|
||||
3
source/mindbot/pyproject.toml
Normal file
3
source/mindbot/pyproject.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel", "toml"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
47
source/mindbot/setup.py
Normal file
47
source/mindbot/setup.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""Installation script for the 'mindbot' python package."""
|
||||
|
||||
import os
|
||||
import toml
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
# Obtain the extension data from the extension.toml file
|
||||
EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||
# Read the extension.toml file
|
||||
EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
|
||||
|
||||
# Minimum dependencies required prior to installation
|
||||
INSTALL_REQUIRES = [
|
||||
# NOTE: Add dependencies
|
||||
"psutil",
|
||||
]
|
||||
|
||||
# Installation operation
|
||||
setup(
|
||||
name="mindbot",
|
||||
packages=["mindbot"],
|
||||
author=EXTENSION_TOML_DATA["package"]["author"],
|
||||
maintainer=EXTENSION_TOML_DATA["package"]["maintainer"],
|
||||
url=EXTENSION_TOML_DATA["package"]["repository"],
|
||||
version=EXTENSION_TOML_DATA["package"]["version"],
|
||||
description=EXTENSION_TOML_DATA["package"]["description"],
|
||||
keywords=EXTENSION_TOML_DATA["package"]["keywords"],
|
||||
install_requires=INSTALL_REQUIRES,
|
||||
license="Apache-2.0",
|
||||
include_package_data=True,
|
||||
python_requires=">=3.10",
|
||||
classifiers=[
|
||||
"Natural Language :: English",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Isaac Sim :: 4.5.0",
|
||||
"Isaac Sim :: 5.0.0",
|
||||
"Isaac Sim :: 5.1.0",
|
||||
],
|
||||
zip_safe=False,
|
||||
)
|
||||
Reference in New Issue
Block a user