rich annotations & update open-pi fsdp explanations

This commit is contained in:
Leon998
2026-03-18 13:59:52 +08:00
parent 814f3c3526
commit 4934c4794e
11 changed files with 349 additions and 32 deletions

View File

@@ -1,6 +1,7 @@
import numpy as np
import omni.replicator.core as rep
from core.cameras.base_camera import register_camera
from core.utils.camera_utils import get_src
from omni.isaac.core.prims import XFormPrim
from omni.isaac.core.utils.prims import get_prim_at_path
from omni.isaac.core.utils.transformations import (
@@ -40,9 +41,25 @@ class CustomCamera(Camera):
**kwargs,
)
self.initialize()
self.add_motion_vectors_to_frame()
self.add_semantic_segmentation_to_frame()
self.add_distance_to_image_plane_to_frame()
self.with_distance = cfg["params"].get("with_distance", True)
self.with_semantic = cfg["params"].get("with_semantic", False)
self.with_bbox2d = cfg["params"].get("with_bbox2d", False)
self.with_bbox3d = cfg["params"].get("with_bbox3d", False)
# Motion vectors are high-volume outputs; keep default off unless explicitly enabled in config.
self.with_motion_vector = cfg["params"].get("with_motion_vector", False)
self.with_depth = cfg["params"].get("depth", False)
if self.with_distance:
self.add_distance_to_image_plane_to_frame()
if self.with_semantic:
self.add_semantic_segmentation_to_frame()
if self.with_bbox2d:
self.add_bounding_box_2d_tight_to_frame()
self.add_bounding_box_2d_loose_to_frame()
if self.with_bbox3d:
self.add_bounding_box_3d_to_frame()
if self.with_motion_vector:
self.add_motion_vectors_to_frame()
# ===== From cfg =====
pixel_size = cfg["params"].get("pixel_size")
@@ -155,9 +172,27 @@ class CustomCamera(Camera):
obs = {
"color_image": color_image,
"depth_image": self.get_depth(),
"camera2env_pose": camera2env_pose,
"camera_params": self.is_camera_matrix.tolist(),
}
if self.with_depth:
obs["depth_image"] = get_src(self, "depth"),
seg_data = get_src(self, "seg")
if seg_data is not None:
obs["semantic_mask"] = seg_data["mask"]
obs["semantic_mask_id2labels"] = seg_data["id2labels"]
bbox2d_tight = get_src(self, "bbox2d_tight")
if bbox2d_tight is not None:
obs["bbox2d_tight"], obs["bbox2d_tight_id2labels"] = bbox2d_tight
bbox2d_loose = get_src(self, "bbox2d_loose")
if bbox2d_loose is not None:
obs["bbox2d_loose"], obs["bbox2d_loose_id2labels"] = bbox2d_loose
bbox3d = get_src(self, "bbox3d")
if bbox3d is not None:
obs["bbox3d"], obs["bbox3d_id2labels"] = bbox3d
motion_vectors = get_src(self, "motion_vectors")
if motion_vectors is not None:
obs["motion_vectors"] = motion_vectors
return obs

View File

@@ -27,6 +27,10 @@ class BaseLogger(ABC):
self.object_data_logger: Dict[str, List[Any]] = {}
self.color_image_logger: Dict[str, List[Any]] = {}
self.depth_image_logger: Dict[str, List[Any]] = {}
self.seg_image_logger: Dict[str, List[Any]] = {}
self.color_image_step_logger: Dict[str, List[Any]] = {}
self.depth_image_step_logger: Dict[str, List[Any]] = {}
self.seg_image_step_logger: Dict[str, List[Any]] = {}
def update_tpi_initial_info(self, tpi_initial_info):
self.tpi_initial_info = tpi_initial_info
@@ -67,17 +71,50 @@ class BaseLogger(ABC):
self.scalar_data_logger[robot][key] = []
self.scalar_data_logger[robot][key].append(value)
def add_color_image(self, robot, key, value):
if robot not in self.color_image_logger:
self.color_image_logger[robot] = {}
if key not in self.color_image_logger[robot]:
self.color_image_logger[robot][key] = []
self.color_image_logger[robot][key].append(value)
def _add_image_data(self, data_logger, step_logger, robot, key, value, step_idx=None):
if robot not in data_logger:
data_logger[robot] = {}
if key not in data_logger[robot]:
data_logger[robot][key] = []
data_logger[robot][key].append(value)
# def add_depth_image(self, key, value):
# if key not in self.depth_image_logger:
# self.depth_image_logger[key] = []
# self.depth_image_logger[key].append(value)
if robot not in step_logger:
step_logger[robot] = {}
if key not in step_logger[robot]:
step_logger[robot][key] = []
if step_idx is None:
step_idx = len(data_logger[robot][key]) - 1
step_logger[robot][key].append(int(step_idx))
def add_color_image(self, robot, key, value, step_idx=None):
self._add_image_data(
self.color_image_logger,
self.color_image_step_logger,
robot,
key,
value,
step_idx=step_idx,
)
def add_depth_image(self, robot, key, value, step_idx=None):
self._add_image_data(
self.depth_image_logger,
self.depth_image_step_logger,
robot,
key,
value,
step_idx=step_idx,
)
def add_seg_image(self, robot, key, value, step_idx=None):
self._add_image_data(
self.seg_image_logger,
self.seg_image_step_logger,
robot,
key,
value,
step_idx=step_idx,
)
def clear(
self,
@@ -97,6 +134,10 @@ class BaseLogger(ABC):
self.scalar_data_logger = {}
self.color_image_logger = {}
self.depth_image_logger = {}
self.seg_image_logger = {}
self.color_image_step_logger = {}
self.depth_image_step_logger = {}
self.seg_image_step_logger = {}
@abstractmethod
def close(self):

View File

@@ -13,6 +13,16 @@ from tqdm import tqdm
DEFAULT_RGB_SCALE_FACTOR = 256000.0
def float_array_to_uint16_png(float_array):
array = np.nan_to_num(float_array, nan=0.0, posinf=0.0, neginf=0.0)
array = np.round(array * 10000.0)
array = np.clip(array, 0, 65535)
return array.astype(np.uint16)
def seg_array_to_uint16_png(seg_array):
array = np.nan_to_num(seg_array, nan=0.0, posinf=0.0, neginf=0.0)
array = np.clip(array, 0, 65535)
return array.astype(np.uint16)
# pylint: disable=line-too-long,unused-argument
class LmdbLogger(BaseLogger):
@@ -63,6 +73,7 @@ class LmdbLogger(BaseLogger):
meta_info["tpi_initial_info"] = self.tpi_initial_info
meta_info["collect_info"] = self.collect_info
meta_info["version"] = self.version
meta_info["image_valid_step_ids"] = {}
# Lmdb
log_path_lmdb = save_dir / "lmdb"
@@ -139,13 +150,20 @@ class LmdbLogger(BaseLogger):
# Save color images
if save_img:
for key, value in self.color_image_logger[robot_name].items():
for key, value in self.color_image_logger.get(robot_name, {}).items():
root_img_path = save_dir / f"{key}"
root_img_path.mkdir(parents=True, exist_ok=True)
step_ids = self.color_image_step_logger.get(robot_name, {}).get(key, [])
if len(step_ids) != len(value):
step_ids = list(range(len(value)))
else:
step_ids = [int(x) for x in step_ids]
meta_info["image_valid_step_ids"][key] = step_ids
meta_info["keys"][key] = []
for i, image in enumerate(tqdm(value)):
step_id = str(i).zfill(4)
step_id = str(step_ids[i]).zfill(4)
txn.put(
f"{key}/{step_id}".encode("utf-8"),
pickle.dumps(cv2.imencode(".jpg", image.astype(np.uint8))[1]),
@@ -154,7 +172,49 @@ class LmdbLogger(BaseLogger):
imageio.mimsave(os.path.join(root_img_path, "demo.mp4"), value, fps=15)
meta_info["num_steps"] = len(value)
for key, value in self.depth_image_logger.get(robot_name, {}).items():
root_img_path = save_dir / f"{key}"
root_img_path.mkdir(parents=True, exist_ok=True)
step_ids = self.depth_image_step_logger.get(robot_name, {}).get(key, [])
if len(step_ids) != len(value):
step_ids = list(range(len(value)))
else:
step_ids = [int(x) for x in step_ids]
meta_info["image_valid_step_ids"][key] = step_ids
meta_info["keys"][key] = []
for i, image in enumerate(tqdm(value)):
step_id = str(step_ids[i]).zfill(4)
depth_image = float_array_to_uint16_png(np.asarray(image))
txn.put(
f"{key}/{step_id}".encode('utf-8'),
pickle.dumps(cv2.imencode('.png', depth_image)[1])
)
meta_info["keys"][key].append(f"{key}/{step_id}".encode('utf-8'))
for key, value in self.seg_image_logger.get(robot_name, {}).items():
root_img_path = save_dir / f"{key}"
root_img_path.mkdir(parents=True, exist_ok=True)
step_ids = self.seg_image_step_logger.get(robot_name, {}).get(key, [])
if len(step_ids) != len(value):
step_ids = list(range(len(value)))
else:
step_ids = [int(x) for x in step_ids]
meta_info["image_valid_step_ids"][key] = step_ids
meta_info["keys"][key] = []
for i, image in enumerate(tqdm(value)):
step_id = str(step_ids[i]).zfill(4)
seg_image = seg_array_to_uint16_png(np.asarray(image))
txn.put(
f"{key}/{step_id}".encode('utf-8'),
pickle.dumps(cv2.imencode('.png', seg_image)[1])
)
meta_info["keys"][key].append(f"{key}/{step_id}".encode('utf-8'))
meta_info["num_steps"] = self.log_num_steps
txn.commit()
lmdb_env.close()
pickle.dump(meta_info, open(os.path.join(save_dir, "meta_info.pkl"), "wb"))

View File

@@ -0,0 +1,128 @@
import numpy as np
from omni.isaac.core.utils.prims import get_prim_at_path
from omni.isaac.core.utils.transformations import get_relative_transform
from omni.isaac.sensor import Camera
def _get_annotator(camera: Camera, annotator_name: str):
custom_annotators = getattr(camera, "_custom_annotators", None)
if not isinstance(custom_annotators, dict):
return None
return custom_annotators.get(annotator_name)
def _get_frame(frame):
if isinstance(frame, np.ndarray) and frame.size > 0:
return frame[:, :, :3]
return None
def _get_depth(depth):
if isinstance(depth, np.ndarray) and depth.size > 0:
return depth
return None
def _get_rgb_image(camera: Camera):
output_mode = getattr(camera, "output_mode", "rgb")
if output_mode == "rgb":
return _get_frame(camera.get_rgba())
if output_mode == "diffuse_albedo":
annotator = _get_annotator(camera, "DiffuseAlbedo")
if annotator is None:
return None
return _get_frame(annotator.get_data())
raise NotImplementedError(f"Unsupported output mode: {output_mode}")
def _get_depth_image(camera: Camera):
annotator = _get_annotator(camera, "distance_to_image_plane")
if annotator is None:
return None
return _get_depth(annotator.get_data())
def _get_object_mask(camera: Camera):
annotator = _get_annotator(camera, "semantic_segmentation")
if annotator is None:
return None
annotation_data = annotator.get_data()
if (
not isinstance(annotation_data, dict)
or "data" not in annotation_data
or "info" not in annotation_data
):
return None
info = annotation_data["info"]
if not isinstance(info, dict) or "idToLabels" not in info:
return None
mask = annotation_data["data"]
if isinstance(mask, np.ndarray) and mask.size > 0:
return {"mask": mask, "id2labels": info["idToLabels"]}
return None
def _get_bbox(camera: Camera, bbox_type: str):
annotator = _get_annotator(camera, bbox_type)
if annotator is None:
return None
annotation_data = annotator.get_data()
if (
not isinstance(annotation_data, dict)
or "data" not in annotation_data
or "info" not in annotation_data
):
return None
info = annotation_data["info"]
if not isinstance(info, dict) or "idToLabels" not in info:
return None
return annotation_data["data"], info["idToLabels"]
def _get_motion_vectors(camera: Camera):
annotator = _get_annotator(camera, "motion_vectors")
if annotator is None:
return None
annotation_data = annotator.get_data()
if isinstance(annotation_data, np.ndarray) and annotation_data.size > 0:
return annotation_data
return None
def _get_camera2env_pose(camera: Camera):
prim_path = getattr(camera, "prim_path", None)
root_prim_path = getattr(camera, "root_prim_path", None)
if not prim_path or not root_prim_path:
return None
return get_relative_transform(get_prim_at_path(prim_path), get_prim_at_path(root_prim_path))
def _get_camera_params(camera: Camera):
camera_matrix = getattr(camera, "is_camera_matrix", None)
if camera_matrix is None:
return None
if isinstance(camera_matrix, np.ndarray):
return camera_matrix.tolist()
return camera_matrix
def get_src(camera: Camera, data_type: str):
if data_type == "rgb":
return _get_rgb_image(camera)
if data_type == "depth":
return _get_depth_image(camera)
if data_type == "seg":
return _get_object_mask(camera)
if data_type == "bbox2d_tight":
return _get_bbox(camera, "bounding_box_2d_tight")
if data_type == "bbox2d_loose":
return _get_bbox(camera, "bounding_box_2d_loose")
if data_type == "bbox3d":
return _get_bbox(camera, "bounding_box_3d")
if data_type == "motion_vectors":
return _get_motion_vectors(camera)
if data_type == "camera2env_pose":
return _get_camera2env_pose(camera)
if data_type == "camera_params":
return _get_camera_params(camera)
raise NotImplementedError(f"Unsupported source type: {data_type}")