init commit

2026-03-16 11:44:10 +00:00
commit 94384a93c9
552 changed files with 363038 additions and 0 deletions
--- a/nimbus/utils/config.py
+++ b/nimbus/utils/config.py
@@ -0,0 +1,20 @@
+from omegaconf import OmegaConf
+
+
+def load_config(*yaml_files, cli_args=None):
+    if cli_args is None:
+        cli_args = []
+    yaml_confs = [OmegaConf.load(f) for f in yaml_files]
+    cli_conf = OmegaConf.from_cli(cli_args)
+    conf = OmegaConf.merge(*yaml_confs, cli_conf)
+    OmegaConf.resolve(conf)
+    return conf
+
+
+def config_to_primitive(config, resolve=True):
+    return OmegaConf.to_container(config, resolve=resolve)
+
+
+def save_config(config, path):
+    with open(path, "w", encoding="utf-8") as fp:
+        OmegaConf.save(config=config, f=fp)
--- a/nimbus/utils/config_processor.py
+++ b/nimbus/utils/config_processor.py
@@ -0,0 +1,138 @@
+"""
+Config Processor: Responsible for identifying, converting, and loading configuration files.
+"""
+
+from omegaconf import DictConfig, OmegaConf
+
+from nimbus.utils.config import load_config
+
+
+class ConfigProcessor:
+    """Config processor class"""
+
+    def __init__(self):
+        pass
+
+    def _check_config_path_exists(self, config, path):
+        """
+        Check if a configuration path exists in the config object
+
+        Args:
+            config: OmegaConf config object
+            path: String path like 'stage_pipe.worker_num' or 'load_stage.scene_loader.args.random_num'
+
+        Returns:
+            bool: Whether the path exists in the config
+        """
+        try:
+            keys = path.split(".")
+            current = config
+            for key in keys:
+                if isinstance(current, DictConfig):
+                    if key not in current:
+                        return False
+                    current = current[key]
+                else:
+                    return False
+            return True
+        except Exception:
+            return False
+
+    def _validate_cli_args(self, config, cli_args):
+        """
+        Validate that all CLI arguments correspond to existing paths in the config
+
+        Args:
+            config: OmegaConf config object
+            cli_args: List of command line arguments
+
+        Raises:
+            ValueError: If any CLI argument path doesn't exist in the config
+        """
+        if not cli_args:
+            return
+
+        # Clean up CLI args to remove -- prefix if present
+        cleaned_cli_args = []
+        for arg in cli_args:
+            if arg.startswith("--"):
+                cleaned_cli_args.append(arg[2:])  # Remove the -- prefix
+            else:
+                cleaned_cli_args.append(arg)
+
+        # Parse CLI args to get the override paths
+        try:
+            cli_conf = OmegaConf.from_cli(cleaned_cli_args)
+        except Exception as e:
+            raise ValueError(f"Invalid CLI argument format: {e}. Please use format like: stage_pipe.worker_num='[2,4]'")
+
+        def check_nested_paths(conf, prefix=""):
+            """Recursively check all paths in the CLI config"""
+            for key, value in conf.items():
+                current_path = f"{prefix}.{key}" if prefix else key
+
+                if isinstance(value, DictConfig):
+                    # Check if this intermediate path exists
+                    if not self._check_config_path_exists(config, current_path):
+                        raise ValueError(f"Configuration path '{current_path}' does not exist in the config file")
+                    # Recursively check nested paths
+                    check_nested_paths(value, current_path)
+                else:
+                    # Check if this leaf path exists
+                    if not self._check_config_path_exists(config, current_path):
+                        raise ValueError(f"Configuration path '{current_path}' does not exist in the config file")
+
+        try:
+            check_nested_paths(cli_conf)
+        except ValueError:
+            raise
+        except Exception:
+            # If there's an issue parsing CLI args, provide helpful error message
+            raise ValueError("Invalid CLI argument format. Please use format like: --key=value or --nested.key=value")
+
+    def process_config(self, config_path, cli_args=None):
+        """
+        Process the config file
+
+        Args:
+            config_path: Path to the config file
+            cli_args: List of command line arguments
+
+        Returns:
+            OmegaConf: Processed config object
+        """
+        # Clean up CLI args to remove -- prefix if present
+        cleaned_cli_args = []
+        if cli_args:
+            for arg in cli_args:
+                if arg.startswith("--"):
+                    cleaned_cli_args.append(arg[2:])  # Remove the -- prefix
+                else:
+                    cleaned_cli_args.append(arg)
+
+        # Load config first without CLI args to validate paths
+        try:
+            base_config = load_config(config_path)
+        except Exception as e:
+            raise ValueError(f"Error loading config: {e}")
+
+        # Validate that CLI arguments correspond to existing paths
+        if cli_args:
+            self._validate_cli_args(base_config, cli_args)
+
+        # Now load config with CLI args (validation passed)
+        config = load_config(config_path, cli_args=cleaned_cli_args)
+
+        return config
+
+    def print_final_config(self, config):
+        """
+        Print the final running config
+
+        Args:
+            config: OmegaConf config object
+        """
+        print("=" * 50)
+        print("final config:")
+        print("=" * 50)
+        print(OmegaConf.to_yaml(config))
--- a/nimbus/utils/flags.py
+++ b/nimbus/utils/flags.py
@@ -0,0 +1,23 @@
+import os
+
+_DEBUG_KEY = "NIMBUS_DEBUG"
+_RANDOM_SEED_KEY = "NIMBUS_RANDOM_SEED"
+
+
+def set_debug_mode(enabled: bool) -> None:
+    """Set debug mode. Must be called before ray.init() to propagate to Ray workers."""
+    os.environ[_DEBUG_KEY] = "1" if enabled else "0"
+
+
+def is_debug_mode() -> bool:
+    return os.environ.get(_DEBUG_KEY, "0") == "1"
+
+
+def set_random_seed(seed: int) -> None:
+    """Set global random seed. Must be called before ray.init() to propagate to Ray workers."""
+    os.environ[_RANDOM_SEED_KEY] = str(seed)
+
+
+def get_random_seed() -> int | None:
+    val = os.environ.get(_RANDOM_SEED_KEY)
+    return int(val) if val is not None else None
--- a/nimbus/utils/logging.py
+++ b/nimbus/utils/logging.py
@@ -0,0 +1,48 @@
+import logging
+import os
+import time
+from datetime import datetime
+
+from nimbus.utils.config import save_config
+
+
+def configure_logging(exp_name, name=None, config=None):
+    pod_name = os.environ.get("POD_NAME", None)
+    if pod_name is not None:
+        exp_name = f"{exp_name}/{pod_name}"
+    log_dir = os.path.join("./output", exp_name)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    if name is None:
+        log_name = f"de_time_profile_{timestamp}.log"
+    else:
+        log_name = f"de_{name}_time_profile_{timestamp}.log"
+
+    log_file = os.path.join(log_dir, log_name)
+
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            os.makedirs(log_dir, exist_ok=True)
+            break
+        except Exception as e:
+            print(f"Warning: Stale file handle when creating {log_dir}, attempt {attempt + 1}/{max_retries}")
+            if attempt < max_retries - 1:
+                time.sleep(3)
+                continue
+            else:
+                raise RuntimeError(f"Failed to create log directory {log_dir} after {max_retries} attempts") from e
+
+    if config is not None:
+        config_log_file = os.path.join(log_dir, "de_config.yaml")
+        save_config(config, config_log_file)
+
+    logger = logging.getLogger("de_logger")
+    logger.setLevel(logging.INFO)
+
+    fh = logging.FileHandler(log_file, mode="a")
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.info("Start Data Engine")
+
+    return logger
--- a/nimbus/utils/random.py
+++ b/nimbus/utils/random.py
@@ -0,0 +1,33 @@
+import os
+import random
+
+import numpy as np
+import torch
+
+# Try to import open3d, but don't fail if it's not installed
+try:
+    import open3d as o3d
+except ImportError:
+    o3d = None
+
+
+def set_all_seeds(seed):
+    """
+    Sets seeds for all relevant random number generators to ensure reproducibility.
+    """
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    print(f"set seed {seed} for all libraries")
+    seed = int(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+    if o3d and hasattr(o3d, "utility") and hasattr(o3d.utility, "random"):
+        o3d.utility.random.seed(seed)
+
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+    # These settings are crucial for deterministic results with CuDNN
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
--- a/nimbus/utils/types.py
+++ b/nimbus/utils/types.py
@@ -0,0 +1,65 @@
+from dataclasses import dataclass
+from typing import Dict, Optional, Tuple
+
+NAME = "name"
+
+# stage name
+LOAD_STAGE = "load_stage"
+PLAN_STAGE = "plan_stage"
+RENDER_STAGE = "render_stage"
+PLAN_WITH_RENDER_STAGE = "plan_with_render_stage"
+STORE_STAGE = "store_stage"
+STAGE_PIPE = "stage_pipe"
+DUMP_STAGE = "dump_stage"
+DEDUMP_STAGE = "dedump_stage"
+
+# instruction name
+# LOAD_STAGE
+SCENE_LOADER = "scene_loader"
+LAYOUT_RANDOM_GENERATOR = "layout_random_generator"
+INDEX_GENERATOR = "index_generator"
+DEDUMPER = "dedumper"
+
+# PLAN_STAGE
+SEQ_PLANNER = "seq_planner"
+PLANNER = "planner"
+SIMULATOR = "simulator"
+
+# RENDER_STAGE
+RENDERER = "renderer"
+
+# PLAN_WITH_RENDER_STAGE
+PLAN_WITH_RENDER = "plan_with_render"
+
+# PIPE_STAGE
+STAGE_NUM = "stage_num"
+STAGE_DEV = "stage_dev"
+WORKER_NUM = "worker_num"
+WORKER_SCHEDULE = "worker_schedule"
+SAFE_THRESHOLD = "safe_threshold"
+STATUS_TIMEOUTS = "status_timeouts"
+MONITOR_CHECK_INTERVAL = "monitor_check_interval"
+
+# STORE_STAGE
+WRITER = "writer"
+DUMPER = "dumper"
+
+OUTPUT_PATH = "output_path"
+INPUT_PATH = "input_path"
+
+TYPE = "type"
+ARGS = "args"
+
+
+@dataclass
+class StageInput:
+    """
+    A data class that encapsulates the input for a stage in the processing pipeline.
+
+    Args:
+        Args (Optional[Tuple]): Positional arguments passed to the stage's processing function.
+        Kwargs (Optional[Dict]): Keyword arguments passed to the stage's processing function.
+    """
+
+    Args: Optional[Tuple] = None
+    Kwargs: Optional[Dict] = None
--- a/nimbus/utils/utils.py
+++ b/nimbus/utils/utils.py
@@ -0,0 +1,182 @@
+import functools
+import os
+import re
+import sys
+import time
+from typing import Tuple, Type, Union
+
+from nimbus.components.data.observation import Observations
+from nimbus.components.data.scene import Scene
+from nimbus.components.data.sequence import Sequence
+
+
+def init_env():
+    sys.path.append("./")
+    sys.path.append("./data_engine")
+    sys.path.append("workflows/simbox")
+
+
+def unpack_iter_data(data: tuple):
+    assert len(data) <= 3, "not support yet"
+    scene = None
+    seq = None
+    obs = None
+    for item in data:
+        if isinstance(item, Scene):
+            scene = item
+        elif isinstance(item, Sequence):
+            seq = item
+        elif isinstance(item, Observations):
+            obs = item
+    return scene, seq, obs
+
+
+def consume_stage(stage_input):
+    if hasattr(stage_input, "Args"):
+        consume_iterators(stage_input.Args)
+        for value in stage_input.Args:
+            if hasattr(value, "__del__"):
+                value.__del__()  # pylint: disable=C2801
+    if hasattr(stage_input, "Kwargs"):
+        if stage_input.Kwargs is not None:
+            for value in stage_input.Kwargs.values():
+                consume_iterators(value)
+                if hasattr(value, "__del__"):
+                    value.__del__()  # pylint: disable=C2801
+
+
+# prevent isaac sim close pipe worker in advance
+def pipe_consume_stage(stage_input):
+    if hasattr(stage_input, "Args"):
+        consume_iterators(stage_input.Args)
+    if hasattr(stage_input, "Kwargs"):
+        if stage_input.Kwargs is not None:
+            for value in stage_input.Kwargs.values():
+                consume_iterators(value)
+
+
+def consume_iterators(obj):
+    # from pdb import set_trace; set_trace()
+    if isinstance(obj, (str, bytes)):
+        return obj
+    if isinstance(obj, dict):
+        return {key: consume_iterators(value) for key, value in obj.items()}
+    if isinstance(obj, list):
+        return [consume_iterators(item) for item in obj]
+    if isinstance(obj, tuple):
+        return tuple(consume_iterators(item) for item in obj)
+    if hasattr(obj, "__iter__"):
+        for item in obj:
+            consume_iterators(item)
+    return obj
+
+
+def scene_names_postprocess(scene_names: list) -> list:
+    """
+    Distributes a list of scene names (folders) among multiple workers in a distributed environment.
+    This function is designed to work with Deep Learning Container (DLC) environments, where worker
+    information is extracted from environment variables. It assigns a subset of the input scene names
+    to the current worker based on its rank and the total number of workers, using a round-robin strategy.
+    If not running in a DLC environment, all scene names are assigned to a single worker.
+    Args:
+        scene_names (list): List of scene names (typically folder names) to be distributed.
+    Returns:
+        list: The subset of scene names assigned to the current worker.
+    Raises:
+        PermissionError: If there is a permission issue accessing the input directory.
+        RuntimeError: For any other errors encountered during processing.
+    Notes:
+        - The function expects certain environment variables (e.g., POD_NAME, WORLD_SIZE) to be set
+          in DLC environments.
+        - If multiple workers are present, the input list is sorted before distribution to ensure
+          consistent assignment across workers.
+    """
+
+    def _get_dlc_worker_info():
+        """Extract worker rank and world size from DLC environment variables."""
+        pod_name = os.environ.get("POD_NAME")
+
+        if pod_name:
+            # Match worker-N or master-N patterns
+            match = re.search(r"dlc.*?-(worker|master)-(\d+)$", pod_name)
+            if match:
+                node_type, node_id = match.groups()
+                world_size = int(os.environ.get("WORLD_SIZE", "1"))
+
+                if node_type == "worker":
+                    rank = int(node_id)
+                else:  # master node
+                    rank = world_size - 1
+
+                return rank, world_size
+
+        # Default for non-DLC environment
+        return 0, 1
+
+    def _distribute_folders(all_folders, rank, world_size):
+        """Distribute folders among workers using round-robin strategy."""
+        if not all_folders:
+            return []
+
+        # Only sort when there are multiple workers to ensure consistency
+        if world_size > 1:
+            all_folders.sort()
+
+        # Distribute using slicing: worker i gets folders at indices i, i+world_size, ...
+        return all_folders[rank::world_size]
+
+    try:
+        # Get all subfolders
+        all_subfolders = scene_names
+        if not all_subfolders:
+            print(f"Warning: No scene found in {scene_names}")
+            return []
+
+        # Get worker identity and distribute folders
+        rank, world_size = _get_dlc_worker_info()
+        assigned_folders = _distribute_folders(all_subfolders, rank, world_size)
+
+        print(
+            f"DLC Worker {rank}/{world_size}: Assigned {len(assigned_folders)} out of "
+            f"{len(all_subfolders)} total folders"
+        )
+
+        return assigned_folders
+
+    except PermissionError:
+        raise PermissionError(f"No permission to access directory: {scene_names}")
+    except Exception as e:
+        raise RuntimeError(f"Error reading input directory {scene_names}: {e}")
+
+
+def retry_on_exception(
+    max_retries: int = 3, retry_exceptions: Union[bool, Tuple[Type[Exception], ...]] = True, delay: float = 1.0
+):
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(self, *args, **kwargs):
+            last_exception = None
+            for attempt in range(max_retries + 1):
+                try:
+                    if attempt > 0:
+                        print(f"Retry attempt {attempt}/{max_retries} for {func.__name__}")
+                    return func(self, *args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    should_retry = False
+                    if retry_exceptions is True:
+                        should_retry = True
+                    elif isinstance(retry_exceptions, (tuple, list)):
+                        should_retry = isinstance(e, retry_exceptions)
+
+                    if should_retry and attempt < max_retries:
+                        print(f"Error in {func.__name__}: {e}. Retrying in {delay} seconds...")
+                        time.sleep(delay)
+                    else:
+                        raise
+            if last_exception:
+                raise last_exception
+
+        return wrapper
+
+    return decorator