init commit

2026-03-16 11:44:10 +00:00
commit 94384a93c9
552 changed files with 363038 additions and 0 deletions
--- a/nimbus/components/data/init.py
+++ b/nimbus/components/data/init.py
--- a/nimbus/components/data/camera.py
+++ b/nimbus/components/data/camera.py
@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+
+@dataclass
+class C2W:
+    """
+    Represents a camera-to-world transformation matrix.
+
+    Attributes:
+        matrix (List[float]): A list of 16 floats representing the 4x4 transformation matrix in row-major order.
+    """
+
+    matrix: List[float]
+
+
+@dataclass
+class Camera:
+    """
+    Represents a single camera pose in the trajectory.
+
+    Attributes:
+        trajectory (List[C2W]): List of C2W transformations for this camera pose.
+        intrinsic (Optional[List[float]]): 3x3 camera intrinsic matrix: [[fx, 0, cx], [0, fy, cy], [0, 0, 1]].
+        extrinsic (Optional[List[float]]): 4x4 tobase_extrinsic matrix representing the camera mounting offset
+            relative to the robot base (height + pitch).
+        length (Optional[int]): Length of the trajectory in number of frames.
+        depths (Optional[list[np.ndarray]]): List of depth images captured by this camera.
+        rgbs (Optional[list[np.ndarray]]): List of RGB images captured by this camera.
+        uv_tracks (Optional[Dict[str, Any]]): UV tracking data in the format
+            {mesh_name: {"per_frame": list, "width": W, "height": H}}.
+        uv_mesh_names (Optional[List[str]]): List of mesh names being tracked in the UV tracking data.
+    """
+
+    trajectory: List[C2W]
+    intrinsic: List[float] = None
+    extrinsic: List[float] = None
+    length: int = None
+    depths: list[np.ndarray] = None
+    rgbs: list[np.ndarray] = None
+    uv_tracks: Optional[Dict[str, Any]] = None
+    uv_mesh_names: Optional[List[str]] = None
+
+    def __len__(self):
+        if self.length is not None:
+            return self.length
+        self._check_length()
+        self.length = len(self.trajectory)
+        return len(self.trajectory)
+
+    def _check_length(self):
+        if self.depths is not None and len(self.depths) != len(self.trajectory):
+            raise ValueError("Length of depths does not match length of trajectory")
+        if self.rgbs is not None and len(self.rgbs) != len(self.trajectory):
+            raise ValueError("Length of rgbs does not match length of trajectory")
+        if self.uv_tracks is not None:
+            for mesh_name, track_data in self.uv_tracks.items():
+                if len(track_data["per_frame"]) != len(self.trajectory):
+                    raise ValueError(f"Length of uv_tracks for mesh {mesh_name} does not match length of trajectory")
+
+    def append_rgb(self, rgb_image: np.ndarray):
+        if self.rgbs is None:
+            self.rgbs = []
+        self.rgbs.append(rgb_image)
+
+    def append_depth(self, depth_image: np.ndarray):
+        if self.depths is None:
+            self.depths = []
+        self.depths.append(depth_image)
--- a/nimbus/components/data/iterator.py
+++ b/nimbus/components/data/iterator.py
@@ -0,0 +1,95 @@
+import logging
+import time
+from abc import abstractmethod
+from collections.abc import Iterator
+from typing import Generic, TypeVar
+
+T = TypeVar("T")
+
+
+# pylint: disable=E0102
+class Iterator(Iterator, Generic[T]):
+    def __init__(self, max_retry=3):
+        self._next_calls = 0.0
+        self._next_total_time = 0.0
+        self._init_time_costs = 0.0
+        self._init_times = 0
+        self._frame_compute_time = 0.0
+        self._frame_compute_frames = 0.0
+        self._frame_io_time = 0.0
+        self._frame_io_frames = 0.0
+        self._wait_time = 0.0
+        self._seq_num = 0.0
+        self._seq_time = 0.0
+        self.logger = logging.getLogger("de_logger")
+        self.max_retry = max_retry
+        self.retry_num = 0
+
+    def record_init_time(self, time_costs):
+        self._init_times += 1
+        self._init_time_costs += time_costs
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        start_time = time.time()
+        try:
+            result = self._next()
+        except StopIteration:
+            self._log_statistics()
+            raise
+        end_time = time.time()
+        self._next_calls += 1
+        self._next_total_time += end_time - start_time
+        return result
+
+    def collect_compute_frame_info(self, length, time_costs):
+        self._frame_compute_frames += length
+        self._frame_compute_time += time_costs
+
+    def collect_io_frame_info(self, length, time_costs):
+        self._frame_io_frames += length
+        self._frame_io_time += time_costs
+
+    def collect_wait_time_info(self, time_costs):
+        self._wait_time += time_costs
+
+    def collect_seq_info(self, length, time_costs):
+        self._seq_num += length
+        self._seq_time += time_costs
+
+    @abstractmethod
+    def _next(self):
+        raise NotImplementedError("Subclasses should implement this method.")
+
+    def _log_statistics(self):
+        class_name = self.__class__.__name__
+        self.logger.info(
+            f"{class_name}: Next method called {self._next_calls} times, total time:"
+            f" {self._next_total_time:.6f} seconds"
+        )
+        if self._init_time_costs > 0:
+            self.logger.info(
+                f"{class_name}: Init time: {self._init_time_costs:.6f} seconds, init {self._init_times} times"
+            )
+        if self._frame_compute_time > 0:
+            avg_compute_time = self._frame_compute_time / self._frame_compute_frames
+            self.logger.info(
+                f"{class_name}: compute frame num: {self._frame_compute_frames}, total time:"
+                f" {self._frame_compute_time:.6f} seconds, average time: {avg_compute_time:.6f} seconds per frame"
+            )
+        if self._frame_io_frames > 0:
+            avg_io_time = self._frame_io_time / self._frame_io_frames
+            self.logger.info(
+                f"{class_name}: io frame num: {self._frame_io_frames}, total time: {self._frame_io_time:.6f} seconds,"
+                f" average time: {avg_io_time:.6f} seconds per frame"
+            )
+        if self._wait_time > 0:
+            self.logger.info(f"{class_name}: wait time: {self._wait_time:.6f} seconds")
+        if self._seq_time > 0:
+            avg_seq_time = self._seq_time / self._seq_num
+            self.logger.info(
+                f"{class_name}: seq num: {self._seq_num:.6f}, total time: {self._seq_time:.6f} seconds, average time:"
+                f" {avg_seq_time:.6f} seconds per sequence"
+            )
--- a/nimbus/components/data/observation.py
+++ b/nimbus/components/data/observation.py
@@ -0,0 +1,119 @@
+import os
+
+import cv2
+import imageio
+import numpy as np
+
+from nimbus.components.data.camera import Camera
+
+
+class Observations:
+    """
+    Represents a single observation of a scene, which may include multiple camera trajectories and associated data.
+    Each observation is identified by a unique name and index, and can contain multiple Camera items that capture
+    different viewpoints or modalities of the same scene.
+
+    Args:
+        scene_name (str): The name of the scene associated with this observation.
+        index (str): The index or ID of this observation within the scene.
+        length (int): Optional total length of the observation. Calculated from camera trajectories if not provided.
+        data (dict): Optional dictionary for storing additional arbitrary data, such as metadata or annotations.
+    """
+
+    def __init__(self, scene_name: str, index: str, length: int = None, data: dict = None):
+        self.scene_name = scene_name
+        self.obs_name = scene_name + "_" + index
+        self.index = index
+        self.cam_items = []
+        self.length = length
+        self.data = data
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+
+    def append_cam(self, item: Camera):
+        self.cam_items.append(item)
+
+    def __len__(self):
+        if self.length is not None:
+            return self.length
+        self.length = 0
+        for cam in self.cam_items:
+            self.length += len(cam)
+        return self.length
+
+    def get_length(self):
+        return len(self)
+
+    def flush_to_disk(self, path, video_fps=10):
+        path_to_save = os.path.join(path, "trajectory_" + self.index)
+        print(f"obs {self.obs_name} try to save path in {path_to_save}")
+        os.makedirs(path_to_save, exist_ok=True)
+
+        # Single camera: save in root directory
+        if len(self.cam_items) == 1:
+            cam = self.cam_items[0]
+            self._save_camera_data(path_to_save, cam, video_fps)
+        # Multiple cameras: save in camera_0/, camera_1/, etc.
+        else:
+            for idx, cam in enumerate(self.cam_items):
+                camera_dir = os.path.join(path_to_save, f"camera_{idx}")
+                os.makedirs(camera_dir, exist_ok=True)
+                self._save_camera_data(camera_dir, cam, video_fps)
+
+    def _save_camera_data(self, save_dir, cam: Camera, video_fps):
+        """Helper method to save camera visualization data (rgbs, depths) to a directory."""
+        # Save RGB and depth images if available
+        if cam.rgbs is not None and len(cam.rgbs) > 0:
+            rgb_images_path = os.path.join(save_dir, "rgb/")
+            os.makedirs(rgb_images_path, exist_ok=True)
+
+            fps_path = os.path.join(save_dir, "fps.mp4")
+
+            for idx, rgb_item in enumerate(cam.rgbs):
+                rgb_filename = os.path.join(rgb_images_path, f"{idx}.jpg")
+                cv2.imwrite(rgb_filename, cv2.cvtColor(rgb_item, cv2.COLOR_BGR2RGB))
+
+            imageio.mimwrite(fps_path, cam.rgbs, fps=video_fps)
+
+        if cam.depths is not None and len(cam.depths) > 0:
+            depth_images_path = os.path.join(save_dir, "depth/")
+            os.makedirs(depth_images_path, exist_ok=True)
+
+            depth_path = os.path.join(save_dir, "depth.mp4")
+
+            # Create a copy for video (8-bit version)
+            depth_video_frames = []
+            for idx, depth_item in enumerate(cam.depths):
+                depth_filename = os.path.join(depth_images_path, f"{idx}.png")
+                cv2.imwrite(depth_filename, depth_item)
+                depth_video_frames.append((depth_item >> 8).astype(np.uint8))
+
+            imageio.mimwrite(depth_path, depth_video_frames, fps=video_fps)
+
+        # Save UV tracking visualizations if available
+        if cam.uv_tracks is not None and cam.uv_mesh_names is not None and cam.rgbs is not None:
+            num_frames = len(cam.rgbs)
+            try:
+                from nimbus_extension.components.render.brpc_utils.point_tracking import (
+                    make_uv_overlays_and_video,
+                )
+            except ImportError as e:
+                raise ImportError(
+                    "UV tracking visualization requires nimbus_extension. "
+                    "Please add `import nimbus_extension` before running the pipeline."
+                ) from e
+
+            make_uv_overlays_and_video(
+                cam.rgbs,
+                cam.uv_tracks,
+                cam.uv_mesh_names,
+                start_frame=0,
+                end_frame=num_frames,
+                fps=video_fps,
+                path_to_save=save_dir,
+            )
--- a/nimbus/components/data/package.py
+++ b/nimbus/components/data/package.py
@@ -0,0 +1,39 @@
+import pickle
+
+
+class Package:
+    """
+    A class representing a data package that can be serialized and deserialized for pipeline.
+
+    Args:
+        data: The actual data contained in the package, which can be of any type.
+        task_id (int): The ID of the task associated with this package.
+        task_name (str): The name of the task associated with this package.
+        stop_sig (bool): Whether this package signals the pipeline to stop.
+    """
+
+    def __init__(self, data, task_id: int = -1, task_name: str = None, stop_sig: bool = False):
+        self.is_ser = False
+        self.data = data
+        self.task_id = task_id
+        self.task_name = task_name
+        self.stop_sig = stop_sig
+
+    def serialize(self):
+        assert self.is_ser is False, "data is already serialized"
+        self.data = pickle.dumps(self.data)
+        self.is_ser = True
+
+    def deserialize(self):
+        assert self.is_ser is True, "data is already deserialized"
+        self.data = pickle.loads(self.data)
+        self.is_ser = False
+
+    def is_serialized(self):
+        return self.is_ser
+
+    def get_data(self):
+        return self.data
+
+    def should_stop(self):
+        return self.stop_sig is True
--- a/nimbus/components/data/scene.py
+++ b/nimbus/components/data/scene.py
@@ -0,0 +1,69 @@
+class Scene:
+    """
+    Represents a loaded scene in the simulation environment, holding workflow context and task execution state.
+
+    Args:
+        name (str): The name of the scene or task.
+        pcd: Point cloud data associated with the scene.
+        scale (float): Scale factor for the scene geometry.
+        materials: Material data for the scene.
+        textures: Texture data for the scene.
+        floor_heights: Floor height information for the scene.
+        wf: The task workflow instance managing this scene.
+        task_id (int): The index of the current task within the workflow.
+        task_exec_num (int): The execution count for the current task, used for task repetition tracking.
+        simulation_app: The Isaac Sim SimulationApp instance.
+    """
+
+    def __init__(
+        self,
+        name: str = None,
+        pcd=None,
+        scale: float = 1.0,
+        materials=None,
+        textures=None,
+        floor_heights=None,
+        wf=None,
+        task_id: int = None,
+        task_exec_num: int = 1,
+        simulation_app=None,
+    ):
+        self.name = name
+        self.pcd = pcd
+        self.materials = materials
+        self.textures = textures
+        self.floor_heights = floor_heights
+        self.scale = scale
+        self.wf = wf
+        self.simulation_app = simulation_app
+        self.task_id = task_id
+        self.plan_info = None
+        self.generate_success = False
+        self.task_exec_num = task_exec_num
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        del state["pcd"]
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.pcd = None
+
+    def add_plan_info(self, plan_info):
+        self.plan_info = plan_info
+
+    def flush_to_disk(self, path):
+        pass
+
+    def load_from_disk(self, path):
+        pass
+
+    def update_generate_status(self, success):
+        self.generate_success = success
+
+    def get_generate_status(self):
+        return self.generate_success
+
+    def update_task_exec_num(self, num):
+        self.task_exec_num = num
--- a/nimbus/components/data/sequence.py
+++ b/nimbus/components/data/sequence.py
@@ -0,0 +1,145 @@
+import json
+import os
+
+import numpy as np
+import open3d as o3d
+
+from nimbus.components.data.camera import C2W, Camera
+
+
+class Sequence:
+    """
+    Represents a camera trajectory sequence with associated metadata.
+
+    Args:
+        scene_name (str): The name of the scene (e.g., room identifier).
+        index (str): The index or ID of this sequence within the scene.
+        length (int): Optional explicit sequence length. Calculated from camera trajectories if not provided.
+        data (dict): Optional additional arbitrary data associated with the sequence.
+    """
+
+    def __init__(self, scene_name: str, index: str, length: int = None, data: dict = None):
+        self.scene_name = scene_name
+        self.seq_name = scene_name + "_" + index
+        self.index = index
+        self.cam_items: list[Camera] = []
+        self.path_pcd = None
+        self.length = length
+        self.data = data
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state["path_pcd_color"] = np.asarray(state["path_pcd"].colors)
+        state["path_pcd"] = o3d.io.write_point_cloud_to_bytes(state["path_pcd"], "mem::xyz")
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.path_pcd = o3d.io.read_point_cloud_from_bytes(state["path_pcd"], "mem::xyz")
+        self.path_pcd.colors = o3d.utility.Vector3dVector(state["path_pcd_color"])
+
+    def __len__(self):
+        if self.length is not None:
+            return self.length
+        self.length = 0
+        for cam in self.cam_items:
+            self.length += len(cam)
+        return self.length
+
+    def append_cam(self, item: Camera):
+        self.cam_items.append(item)
+
+    def update_pcd(self, path_pcd):
+        self.path_pcd = path_pcd
+
+    def get_length(self):
+        return len(self)
+
+    def flush_to_disk(self, path):
+        path_to_save = os.path.join(path, "trajectory_" + self.index)
+        print(f"seq {self.seq_name} try to save path in {path_to_save}")
+        os.makedirs(path_to_save, exist_ok=True)
+        if self.path_pcd is not None:
+            pcd_path = os.path.join(path_to_save, "path.ply")
+            o3d.io.write_point_cloud(pcd_path, self.path_pcd)
+
+        # Single camera: save in root directory
+        if len(self.cam_items) == 1:
+            cam = self.cam_items[0]
+            camera_trajectory_list = [t.matrix for t in cam.trajectory]
+            save_dict = {
+                "camera_intrinsic": cam.intrinsic if cam.intrinsic is not None else None,
+                "camera_extrinsic": cam.extrinsic if cam.extrinsic is not None else None,
+                "camera_trajectory": camera_trajectory_list,
+            }
+            traj_path = os.path.join(path_to_save, "data.json")
+            json_object = json.dumps(save_dict, indent=4)
+            with open(traj_path, "w", encoding="utf-8") as outfile:
+                outfile.write(json_object)
+        # Multiple cameras: save in camera_0/, camera_1/, etc.
+        else:
+            for idx, cam in enumerate(self.cam_items):
+                camera_dir = os.path.join(path_to_save, f"camera_{idx}")
+                os.makedirs(camera_dir, exist_ok=True)
+                camera_trajectory_list = [t.matrix for t in cam.trajectory]
+                save_dict = {
+                    "camera_intrinsic": cam.intrinsic if cam.intrinsic is not None else None,
+                    "camera_extrinsic": cam.extrinsic if cam.extrinsic is not None else None,
+                    "camera_trajectory": camera_trajectory_list,
+                }
+                traj_path = os.path.join(camera_dir, "data.json")
+                json_object = json.dumps(save_dict, indent=4)
+                with open(traj_path, "w", encoding="utf-8") as outfile:
+                    outfile.write(json_object)
+
+    def load_from_disk(self, path):
+        print(f"seq {self.seq_name} try to load path from {path}")
+
+        pcd_path = os.path.join(path, "path.ply")
+        if os.path.exists(pcd_path):
+            self.path_pcd = o3d.io.read_point_cloud(pcd_path)
+
+        # Clear existing camera items
+        self.cam_items = []
+
+        # Check if single camera format (data.json in root)
+        traj_path = os.path.join(path, "data.json")
+        if os.path.exists(traj_path):
+            with open(traj_path, "r", encoding="utf-8") as infile:
+                data = json.load(infile)
+
+            camera_trajectory_list = []
+            for trajectory in data["camera_trajectory"]:
+                camera_trajectory_list.append(C2W(matrix=trajectory))
+
+            cam = Camera(
+                trajectory=camera_trajectory_list,
+                intrinsic=data.get("camera_intrinsic"),
+                extrinsic=data.get("camera_extrinsic"),
+            )
+            self.cam_items.append(cam)
+        else:
+            # Multiple camera format (camera_0/, camera_1/, etc.)
+            idx = 0
+            while True:
+                camera_dir = os.path.join(path, f"camera_{idx}")
+                camera_json = os.path.join(camera_dir, "data.json")
+                if not os.path.exists(camera_json):
+                    break
+
+                with open(camera_json, "r", encoding="utf-8") as infile:
+                    data = json.load(infile)
+
+                camera_trajectory_list = []
+                for trajectory in data["camera_trajectory"]:
+                    camera_trajectory_list.append(C2W(matrix=trajectory))
+
+                cam = Camera(
+                    trajectory=camera_trajectory_list,
+                    intrinsic=data.get("camera_intrinsic"),
+                    extrinsic=data.get("camera_extrinsic"),
+                )
+                self.cam_items.append(cam)
+                idx += 1
+
+            assert len(self.cam_items) > 0, f"No camera data found in {path}"