mindbot/scripts/tools/train_and_publish_checkpoints.py

# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

"""Script to manage pretrained checkpoints for Isaac Lab environments.

This script is used to train and publish pretrained checkpoints for Isaac Lab environments.
It supports multiple workflows: rl_games, rsl_rl, sb3, and skrl.

* To train an agent using the rl_games workflow on the Isaac-Cartpole-v0 environment:

  .. code-block:: shell

    python scripts/tools/train_and_publish_checkpoints.py --train rl_games:Isaac-Cartpole-v0

* To train and publish the checkpoints for all workflows on only the direct Cartpole environments:

  .. code-block:: shell

    python scripts/tools/train_and_publish_checkpoints.py \
        -tp "*:Isaac-Cartpole-*Direct-v0" \
        --/persistent/isaaclab/asset_root/pretrained_checkpoints="/some/path"

* To review all repose cube jobs, excluding the 'Play' tasks and 'skrl' workflows:

  .. code-block:: shell

    python scripts/tools/train_and_publish_checkpoints.py \
        -r "*:*Repose-Cube*" \
        --exclude "*:*Play*" \
        --exclude skrl:*

* To publish all results (that have been reviewed and approved).

  .. code-block:: shell

    python scripts/tools/train_and_publish_checkpoints.py \
        --publish --all \
        --/persistent/isaaclab/asset_root/pretrained_checkpoints="/some/path"

"""

import argparse

from isaaclab.app import AppLauncher

# Initialize the parser
parser = argparse.ArgumentParser(
    description="""
Script for training and publishing pre-trained checkpoints in Isaac Lab.

Examples:
    # Train an agent using the rl_games workflow for the Isaac-Cartpole-v0 environment.
    train_and_publish_checkpoints.py --train rl_games:Isaac-Cartpole-v0

    # Train and publish checkpoints for all workflows, targeting only direct Cartpole environments.
    train_and_publish_checkpoints.py -tp "*:Isaac-Cartpole-*Direct-v0" \\
      --/persistent/isaaclab/asset_root/pretrained_checkpoints="/some/path"

    # Review all Repose Cube jobs, excluding Play tasks and skrl jobs.
    train_and_publish_checkpoints.py -r "*:*Repose-Cube*" --exclude "*:*Play*" --exclude skrl:*

    # Publish all results that have been reviewed and approved.
    train_and_publish_checkpoints.py --publish --all \\
      --/persistent/isaaclab/asset_root/pretrained_checkpoints="/some/path"
""",
    formatter_class=argparse.RawTextHelpFormatter,
)

# Add positional arguments that can accept zero or more values
parser.add_argument(
    "jobs",
    nargs="*",
    help="""
A job consists of a workflow and a task name, separated by a colon (wildcards are optional). Examples:

    rl_games:Isaac-Humanoid-*v0      # Wildcard for any Humanoid version
    rsl_rl:Isaac-Ant-*-v0            # Wildcard for any Ant environment
    *:Isaac-Velocity-Flat-Spot-v0    # Wildcard for any workflow, specific task

Wildcards can be used in either the workflow or task name to match multiple entries.
""",
)
parser.add_argument("-t", "--train", action="store_true", help="Train checkpoints for later publishing.")
parser.add_argument("-p", "--publish_checkpoint", action="store_true", help="Publish pre-trained checkpoints.")
parser.add_argument("-r", "--review", action="store_true", help="Review checkpoints.")
parser.add_argument("-l", "--list", action="store_true", help="List all available environments and workflows.")
parser.add_argument("-f", "--force", action="store_true", help="Force training when results already exist.")
parser.add_argument("-a", "--all", action="store_true", help="Run all valid workflow task pairs.")
parser.add_argument(
    "-E",
    "--exclude",
    action="append",
    type=str,
    default=[],
    help="Excludes jobs matching the argument, with wildcard support.",
)
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--force_review", action="store_true", help="Forces review when one already exists.")
parser.add_argument("--force_publish", action="store_true", help="Publish checkpoints without review.")
parser.add_argument("--headless", action="store_true", help="Run training without the UI.")

args, _ = parser.parse_known_args()

# Need something to do
if len(args.jobs) == 0 and not args.all:
    parser.error("Jobs must be provided, or --all.")

# Must train, publish, review or list
if not (args.train or args.publish_checkpoint or args.review or args.list):
    parser.error("A train, publish, review or list flag must be given.")

# List excludes train and publish
if args.list and (args.train or args.publish_checkpoint):
    parser.error("Can't train or publish when listing.")

# launch omniverse app
app_launcher = AppLauncher(headless=True)
simulation_app = app_launcher.app


import csv

# Now everything else
import fnmatch
import json
import os
import subprocess
import sys

import gymnasium as gym
import numpy as np

import omni.client
from omni.client._omniclient import CopyBehavior

from isaaclab_rl.utils.pretrained_checkpoint import (
    WORKFLOW_EXPERIMENT_NAME_VARIABLE,
    WORKFLOW_PLAYER,
    WORKFLOW_TRAINER,
    WORKFLOWS,
    get_log_root_path,
    get_pretrained_checkpoint_path,
    get_pretrained_checkpoint_publish_path,
    get_pretrained_checkpoint_review,
    get_pretrained_checkpoint_review_path,
    has_pretrained_checkpoint_job_finished,
    has_pretrained_checkpoint_job_run,
    has_pretrained_checkpoints_asset_root_dir,
)

# Need somewhere to publish
if args.publish_checkpoint and not has_pretrained_checkpoints_asset_root_dir():
    raise Exception("A /persistent/isaaclab/asset_root/pretrained_checkpoints setting is required to publish.")


def train_job(workflow, task_name, headless=False, force=False, num_envs=None):
    """
    This trains a task using the workflow's train.py script, overriding the experiment name to ensure unique
    log directories.  By default it will return if an experiment has already been run.

    Args:
        workflow: The workflow.
        task_name: The task name.
        headless: Should the training run without the UI.
        force: Run training even if previous experiments have been run.
        num_envs: How many simultaneous environments to simulate, overriding the config.
    """

    log_root_path = get_log_root_path(workflow, task_name)

    # We already ran this
    if not force and os.path.exists(log_root_path) and len(os.listdir(log_root_path)) > 0:
        print(f"Skipping training of {workflow}:{task_name}, already has been run")
        return

    print(f"Training {workflow}:{task_name}")

    # Construct our command
    cmd = [
        sys.executable,
        WORKFLOW_TRAINER[workflow],
        "--task",
        task_name,
        "--enable_cameras",
    ]

    # Changes the directory name for logging
    if WORKFLOW_EXPERIMENT_NAME_VARIABLE[workflow]:
        cmd.append(f"{WORKFLOW_EXPERIMENT_NAME_VARIABLE[workflow]}={task_name}")

    if headless:
        cmd.append("--headless")
    if num_envs:
        cmd.extend(["--num_envs", str(num_envs)])

    print("Running : " + " ".join(cmd))

    subprocess.run(cmd)


def review_pretrained_checkpoint(workflow, task_name, force_review=False, num_envs=None):
    """
    This initiates a review of the pretrained checkpoint.  The play.py script for the workflow is run, and the user
    inspects the results.  When done they close the simulator and will be prompted for their review.

    Args:
        workflow: The workflow.
        task_name: The task name.
        force_review: Performs the review even if a review already exists.
        num_envs: How many simultaneous environments to simulate, overriding the config.
    """

    # This workflow task pair hasn't been trained
    if not has_pretrained_checkpoint_job_run(workflow, task_name):
        print(f"Skipping review of {workflow}:{task_name}, hasn't been trained yet")
        return

    # Couldn't find the checkpoint
    if not has_pretrained_checkpoint_job_finished(workflow, task_name):
        print(f"Training not complete for {workflow}:{task_name}")
        return

    review = get_pretrained_checkpoint_review(workflow, task_name)

    if not force_review and review and review["reviewed"]:
        print(f"Review already complete for {workflow}:{task_name}")
        return

    print(f"Reviewing {workflow}:{task_name}")

    # Construct our command
    cmd = [
        sys.executable,
        WORKFLOW_PLAYER[workflow],
        "--task",
        task_name,
        "--checkpoint",
        get_pretrained_checkpoint_path(workflow, task_name),
        "--enable_cameras",
    ]

    if num_envs:
        cmd.extend(["--num_envs", str(num_envs)])

    print("Running : " + " ".join(cmd))

    subprocess.run(cmd)

    # Give user a chance to leave the old review
    if force_review and review and review["reviewed"]:
        result = review["result"]
        notes = review.get("notes")
        print(f"A review already exists for {workflow}:{task_name}, it was marked as '{result}'.")
        print(f"  Notes: {notes}")
        answer = input("Would you like to replace it?  Please answer yes or no (y/n) [n]: ").strip().lower()
        if answer != "y":
            return

    # Get the verdict from the user
    print(f"Do you accept this checkpoint for {workflow}:{task_name}?")

    answer = input("Please answer yes, no or undetermined (y/n/u) [u]: ").strip().lower()
    if answer not in {"y", "n", "u"}:
        answer = "u"
    answer_map = {
        "y": "accepted",
        "n": "rejected",
        "u": "undetermined",
    }

    # Create the review dict
    review = {
        "reviewed": True,
        "result": answer_map[answer],
    }

    # Maybe add some notes
    notes = input("Please add notes or hit enter: ").strip().lower()
    if notes:
        review["notes"] = notes

    # Save the review JSON file
    path = get_pretrained_checkpoint_review_path(workflow, task_name)
    if not path:
        raise Exception("This shouldn't be possible, something went very wrong.")

    with open(path, "w") as f:
        json.dump(review, f, indent=4)


def publish_pretrained_checkpoint(workflow, task_name, force_publish=False):
    """
    This publishes the pretrained checkpoint to Nucleus using the asset path in the
    /persistent/isaaclab/asset_root/pretrained_checkpoints Carb variable.

    Args:
        workflow: The workflow.
        task_name: The task name.
        force_publish: Publish without review.
    """

    # This workflow task pair hasn't been trained
    if not has_pretrained_checkpoint_job_run(workflow, task_name):
        print(f"Skipping publishing of {workflow}:{task_name}, hasn't been trained yet")
        return

    # Couldn't find the checkpoint
    if not has_pretrained_checkpoint_job_finished(workflow, task_name):
        print(f"Training not complete for {workflow}:{task_name}")
        return

    # Get local pretrained checkpoint path
    local_path = get_pretrained_checkpoint_path(workflow, task_name)
    if not local_path:
        raise Exception("This shouldn't be possible, something went very wrong.")

    # Not forcing, need to check review results
    if not force_publish:
        # Grab the review if it exists
        review = get_pretrained_checkpoint_review(workflow, task_name)

        if not review or not review["reviewed"]:
            print(f"Skipping publishing of {workflow}:{task_name}, hasn't been reviewed yet")
            return

        result = review["result"]
        if result != "accepted":
            print(f'Skipping publishing of {workflow}:{task_name}, review result was "{result}"')
            return

    print(f"Publishing {workflow}:{task_name}")

    # Copy the file
    publish_path = get_pretrained_checkpoint_publish_path(workflow, task_name)
    omni.client.copy_file(local_path, publish_path, CopyBehavior.OVERWRITE)


def get_job_summary_row(workflow, task_name):
    """Returns a single row summary of the job"""

    has_run = has_pretrained_checkpoint_job_run(workflow, task_name)
    has_finished = has_pretrained_checkpoint_job_finished(workflow, task_name)
    review = get_pretrained_checkpoint_review(workflow, task_name)

    if review:
        result = review.get("result", "undetermined")
        notes = review.get("notes", "")
    else:
        result = ""
        notes = ""

    return [workflow, task_name, has_run, has_finished, result, notes]


def main():
    # Figure out what workflows and tasks we'll be using
    if args.all:
        jobs = ["*:*"]
    else:
        jobs = args.jobs

    if args.list:
        print()
        print("# Workflow, Task, Ran, Finished, Review, Notes")

    summary_rows = []

    # Could be implemented more efficiently, but the performance gain would be inconsequential
    for workflow in WORKFLOWS:
        for task_spec in sorted(gym.registry.values(), key=lambda t: t.id):
            job_id = f"{workflow}:{task_spec.id}"

            # We've excluded this job
            if any(fnmatch.fnmatch(job_id, e) for e in args.exclude):
                continue

            # None of our jobs match this pair
            if not np.any(np.array([fnmatch.fnmatch(job_id, job) for job in jobs])):
                continue

            # No config for this workflow
            if workflow + "_cfg_entry_point" not in task_spec.kwargs:
                continue

            if args.list:
                summary_rows.append(get_job_summary_row(workflow, task_spec.id))
                continue

            # Training reviewing and publishing
            if args.train:
                train_job(workflow, task_spec.id, args.headless, args.force, args.num_envs)

            if args.review:
                review_pretrained_checkpoint(workflow, task_spec.id, args.force_review, args.num_envs)

            if args.publish_checkpoint:
                publish_pretrained_checkpoint(workflow, task_spec.id, args.force_publish)

    if args.list:
        writer = csv.writer(sys.stdout, quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerows(summary_rows)


if __name__ == "__main__":
    try:
        # Run the main function
        main()
    except Exception as e:
        raise e
    finally:
        # Close the app
        simulation_app.close()