rich annotations & update open-pi fsdp explanations
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
import omni.replicator.core as rep
|
||||
from core.cameras.base_camera import register_camera
|
||||
from core.utils.camera_utils import get_src
|
||||
from omni.isaac.core.prims import XFormPrim
|
||||
from omni.isaac.core.utils.prims import get_prim_at_path
|
||||
from omni.isaac.core.utils.transformations import (
|
||||
@@ -40,9 +41,25 @@ class CustomCamera(Camera):
|
||||
**kwargs,
|
||||
)
|
||||
self.initialize()
|
||||
self.add_motion_vectors_to_frame()
|
||||
self.add_semantic_segmentation_to_frame()
|
||||
self.add_distance_to_image_plane_to_frame()
|
||||
self.with_distance = cfg["params"].get("with_distance", True)
|
||||
self.with_semantic = cfg["params"].get("with_semantic", False)
|
||||
self.with_bbox2d = cfg["params"].get("with_bbox2d", False)
|
||||
self.with_bbox3d = cfg["params"].get("with_bbox3d", False)
|
||||
# Motion vectors are high-volume outputs; keep default off unless explicitly enabled in config.
|
||||
self.with_motion_vector = cfg["params"].get("with_motion_vector", False)
|
||||
self.with_depth = cfg["params"].get("depth", False)
|
||||
|
||||
if self.with_distance:
|
||||
self.add_distance_to_image_plane_to_frame()
|
||||
if self.with_semantic:
|
||||
self.add_semantic_segmentation_to_frame()
|
||||
if self.with_bbox2d:
|
||||
self.add_bounding_box_2d_tight_to_frame()
|
||||
self.add_bounding_box_2d_loose_to_frame()
|
||||
if self.with_bbox3d:
|
||||
self.add_bounding_box_3d_to_frame()
|
||||
if self.with_motion_vector:
|
||||
self.add_motion_vectors_to_frame()
|
||||
|
||||
# ===== From cfg =====
|
||||
pixel_size = cfg["params"].get("pixel_size")
|
||||
@@ -155,9 +172,27 @@ class CustomCamera(Camera):
|
||||
|
||||
obs = {
|
||||
"color_image": color_image,
|
||||
"depth_image": self.get_depth(),
|
||||
"camera2env_pose": camera2env_pose,
|
||||
"camera_params": self.is_camera_matrix.tolist(),
|
||||
}
|
||||
if self.with_depth:
|
||||
obs["depth_image"] = get_src(self, "depth"),
|
||||
|
||||
seg_data = get_src(self, "seg")
|
||||
if seg_data is not None:
|
||||
obs["semantic_mask"] = seg_data["mask"]
|
||||
obs["semantic_mask_id2labels"] = seg_data["id2labels"]
|
||||
|
||||
bbox2d_tight = get_src(self, "bbox2d_tight")
|
||||
if bbox2d_tight is not None:
|
||||
obs["bbox2d_tight"], obs["bbox2d_tight_id2labels"] = bbox2d_tight
|
||||
bbox2d_loose = get_src(self, "bbox2d_loose")
|
||||
if bbox2d_loose is not None:
|
||||
obs["bbox2d_loose"], obs["bbox2d_loose_id2labels"] = bbox2d_loose
|
||||
bbox3d = get_src(self, "bbox3d")
|
||||
if bbox3d is not None:
|
||||
obs["bbox3d"], obs["bbox3d_id2labels"] = bbox3d
|
||||
motion_vectors = get_src(self, "motion_vectors")
|
||||
if motion_vectors is not None:
|
||||
obs["motion_vectors"] = motion_vectors
|
||||
return obs
|
||||
|
||||
@@ -27,6 +27,10 @@ class BaseLogger(ABC):
|
||||
self.object_data_logger: Dict[str, List[Any]] = {}
|
||||
self.color_image_logger: Dict[str, List[Any]] = {}
|
||||
self.depth_image_logger: Dict[str, List[Any]] = {}
|
||||
self.seg_image_logger: Dict[str, List[Any]] = {}
|
||||
self.color_image_step_logger: Dict[str, List[Any]] = {}
|
||||
self.depth_image_step_logger: Dict[str, List[Any]] = {}
|
||||
self.seg_image_step_logger: Dict[str, List[Any]] = {}
|
||||
|
||||
def update_tpi_initial_info(self, tpi_initial_info):
|
||||
self.tpi_initial_info = tpi_initial_info
|
||||
@@ -67,17 +71,50 @@ class BaseLogger(ABC):
|
||||
self.scalar_data_logger[robot][key] = []
|
||||
self.scalar_data_logger[robot][key].append(value)
|
||||
|
||||
def add_color_image(self, robot, key, value):
|
||||
if robot not in self.color_image_logger:
|
||||
self.color_image_logger[robot] = {}
|
||||
if key not in self.color_image_logger[robot]:
|
||||
self.color_image_logger[robot][key] = []
|
||||
self.color_image_logger[robot][key].append(value)
|
||||
def _add_image_data(self, data_logger, step_logger, robot, key, value, step_idx=None):
|
||||
if robot not in data_logger:
|
||||
data_logger[robot] = {}
|
||||
if key not in data_logger[robot]:
|
||||
data_logger[robot][key] = []
|
||||
data_logger[robot][key].append(value)
|
||||
|
||||
# def add_depth_image(self, key, value):
|
||||
# if key not in self.depth_image_logger:
|
||||
# self.depth_image_logger[key] = []
|
||||
# self.depth_image_logger[key].append(value)
|
||||
if robot not in step_logger:
|
||||
step_logger[robot] = {}
|
||||
if key not in step_logger[robot]:
|
||||
step_logger[robot][key] = []
|
||||
if step_idx is None:
|
||||
step_idx = len(data_logger[robot][key]) - 1
|
||||
step_logger[robot][key].append(int(step_idx))
|
||||
|
||||
def add_color_image(self, robot, key, value, step_idx=None):
|
||||
self._add_image_data(
|
||||
self.color_image_logger,
|
||||
self.color_image_step_logger,
|
||||
robot,
|
||||
key,
|
||||
value,
|
||||
step_idx=step_idx,
|
||||
)
|
||||
|
||||
def add_depth_image(self, robot, key, value, step_idx=None):
|
||||
self._add_image_data(
|
||||
self.depth_image_logger,
|
||||
self.depth_image_step_logger,
|
||||
robot,
|
||||
key,
|
||||
value,
|
||||
step_idx=step_idx,
|
||||
)
|
||||
|
||||
def add_seg_image(self, robot, key, value, step_idx=None):
|
||||
self._add_image_data(
|
||||
self.seg_image_logger,
|
||||
self.seg_image_step_logger,
|
||||
robot,
|
||||
key,
|
||||
value,
|
||||
step_idx=step_idx,
|
||||
)
|
||||
|
||||
def clear(
|
||||
self,
|
||||
@@ -97,6 +134,10 @@ class BaseLogger(ABC):
|
||||
self.scalar_data_logger = {}
|
||||
self.color_image_logger = {}
|
||||
self.depth_image_logger = {}
|
||||
self.seg_image_logger = {}
|
||||
self.color_image_step_logger = {}
|
||||
self.depth_image_step_logger = {}
|
||||
self.seg_image_step_logger = {}
|
||||
|
||||
@abstractmethod
|
||||
def close(self):
|
||||
|
||||
@@ -13,6 +13,16 @@ from tqdm import tqdm
|
||||
|
||||
DEFAULT_RGB_SCALE_FACTOR = 256000.0
|
||||
|
||||
def float_array_to_uint16_png(float_array):
|
||||
array = np.nan_to_num(float_array, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
array = np.round(array * 10000.0)
|
||||
array = np.clip(array, 0, 65535)
|
||||
return array.astype(np.uint16)
|
||||
|
||||
def seg_array_to_uint16_png(seg_array):
|
||||
array = np.nan_to_num(seg_array, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
array = np.clip(array, 0, 65535)
|
||||
return array.astype(np.uint16)
|
||||
|
||||
# pylint: disable=line-too-long,unused-argument
|
||||
class LmdbLogger(BaseLogger):
|
||||
@@ -63,6 +73,7 @@ class LmdbLogger(BaseLogger):
|
||||
meta_info["tpi_initial_info"] = self.tpi_initial_info
|
||||
meta_info["collect_info"] = self.collect_info
|
||||
meta_info["version"] = self.version
|
||||
meta_info["image_valid_step_ids"] = {}
|
||||
|
||||
# Lmdb
|
||||
log_path_lmdb = save_dir / "lmdb"
|
||||
@@ -139,13 +150,20 @@ class LmdbLogger(BaseLogger):
|
||||
|
||||
# Save color images
|
||||
if save_img:
|
||||
for key, value in self.color_image_logger[robot_name].items():
|
||||
for key, value in self.color_image_logger.get(robot_name, {}).items():
|
||||
root_img_path = save_dir / f"{key}"
|
||||
root_img_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
step_ids = self.color_image_step_logger.get(robot_name, {}).get(key, [])
|
||||
if len(step_ids) != len(value):
|
||||
step_ids = list(range(len(value)))
|
||||
else:
|
||||
step_ids = [int(x) for x in step_ids]
|
||||
meta_info["image_valid_step_ids"][key] = step_ids
|
||||
|
||||
meta_info["keys"][key] = []
|
||||
for i, image in enumerate(tqdm(value)):
|
||||
step_id = str(i).zfill(4)
|
||||
step_id = str(step_ids[i]).zfill(4)
|
||||
txn.put(
|
||||
f"{key}/{step_id}".encode("utf-8"),
|
||||
pickle.dumps(cv2.imencode(".jpg", image.astype(np.uint8))[1]),
|
||||
@@ -154,7 +172,49 @@ class LmdbLogger(BaseLogger):
|
||||
|
||||
imageio.mimsave(os.path.join(root_img_path, "demo.mp4"), value, fps=15)
|
||||
|
||||
meta_info["num_steps"] = len(value)
|
||||
for key, value in self.depth_image_logger.get(robot_name, {}).items():
|
||||
root_img_path = save_dir / f"{key}"
|
||||
root_img_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
step_ids = self.depth_image_step_logger.get(robot_name, {}).get(key, [])
|
||||
if len(step_ids) != len(value):
|
||||
step_ids = list(range(len(value)))
|
||||
else:
|
||||
step_ids = [int(x) for x in step_ids]
|
||||
meta_info["image_valid_step_ids"][key] = step_ids
|
||||
|
||||
meta_info["keys"][key] = []
|
||||
for i, image in enumerate(tqdm(value)):
|
||||
step_id = str(step_ids[i]).zfill(4)
|
||||
depth_image = float_array_to_uint16_png(np.asarray(image))
|
||||
txn.put(
|
||||
f"{key}/{step_id}".encode('utf-8'),
|
||||
pickle.dumps(cv2.imencode('.png', depth_image)[1])
|
||||
)
|
||||
meta_info["keys"][key].append(f"{key}/{step_id}".encode('utf-8'))
|
||||
|
||||
for key, value in self.seg_image_logger.get(robot_name, {}).items():
|
||||
root_img_path = save_dir / f"{key}"
|
||||
root_img_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
step_ids = self.seg_image_step_logger.get(robot_name, {}).get(key, [])
|
||||
if len(step_ids) != len(value):
|
||||
step_ids = list(range(len(value)))
|
||||
else:
|
||||
step_ids = [int(x) for x in step_ids]
|
||||
meta_info["image_valid_step_ids"][key] = step_ids
|
||||
|
||||
meta_info["keys"][key] = []
|
||||
for i, image in enumerate(tqdm(value)):
|
||||
step_id = str(step_ids[i]).zfill(4)
|
||||
seg_image = seg_array_to_uint16_png(np.asarray(image))
|
||||
txn.put(
|
||||
f"{key}/{step_id}".encode('utf-8'),
|
||||
pickle.dumps(cv2.imencode('.png', seg_image)[1])
|
||||
)
|
||||
meta_info["keys"][key].append(f"{key}/{step_id}".encode('utf-8'))
|
||||
|
||||
meta_info["num_steps"] = self.log_num_steps
|
||||
txn.commit()
|
||||
lmdb_env.close()
|
||||
pickle.dump(meta_info, open(os.path.join(save_dir, "meta_info.pkl"), "wb"))
|
||||
|
||||
128
workflows/simbox/core/utils/camera_utils.py
Normal file
128
workflows/simbox/core/utils/camera_utils.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
from omni.isaac.core.utils.prims import get_prim_at_path
|
||||
from omni.isaac.core.utils.transformations import get_relative_transform
|
||||
from omni.isaac.sensor import Camera
|
||||
|
||||
|
||||
def _get_annotator(camera: Camera, annotator_name: str):
|
||||
custom_annotators = getattr(camera, "_custom_annotators", None)
|
||||
if not isinstance(custom_annotators, dict):
|
||||
return None
|
||||
return custom_annotators.get(annotator_name)
|
||||
|
||||
|
||||
def _get_frame(frame):
|
||||
if isinstance(frame, np.ndarray) and frame.size > 0:
|
||||
return frame[:, :, :3]
|
||||
return None
|
||||
|
||||
|
||||
def _get_depth(depth):
|
||||
if isinstance(depth, np.ndarray) and depth.size > 0:
|
||||
return depth
|
||||
return None
|
||||
|
||||
|
||||
def _get_rgb_image(camera: Camera):
|
||||
output_mode = getattr(camera, "output_mode", "rgb")
|
||||
if output_mode == "rgb":
|
||||
return _get_frame(camera.get_rgba())
|
||||
if output_mode == "diffuse_albedo":
|
||||
annotator = _get_annotator(camera, "DiffuseAlbedo")
|
||||
if annotator is None:
|
||||
return None
|
||||
return _get_frame(annotator.get_data())
|
||||
raise NotImplementedError(f"Unsupported output mode: {output_mode}")
|
||||
|
||||
|
||||
def _get_depth_image(camera: Camera):
|
||||
annotator = _get_annotator(camera, "distance_to_image_plane")
|
||||
if annotator is None:
|
||||
return None
|
||||
return _get_depth(annotator.get_data())
|
||||
|
||||
|
||||
def _get_object_mask(camera: Camera):
|
||||
annotator = _get_annotator(camera, "semantic_segmentation")
|
||||
if annotator is None:
|
||||
return None
|
||||
annotation_data = annotator.get_data()
|
||||
if (
|
||||
not isinstance(annotation_data, dict)
|
||||
or "data" not in annotation_data
|
||||
or "info" not in annotation_data
|
||||
):
|
||||
return None
|
||||
info = annotation_data["info"]
|
||||
if not isinstance(info, dict) or "idToLabels" not in info:
|
||||
return None
|
||||
mask = annotation_data["data"]
|
||||
if isinstance(mask, np.ndarray) and mask.size > 0:
|
||||
return {"mask": mask, "id2labels": info["idToLabels"]}
|
||||
return None
|
||||
|
||||
|
||||
def _get_bbox(camera: Camera, bbox_type: str):
|
||||
annotator = _get_annotator(camera, bbox_type)
|
||||
if annotator is None:
|
||||
return None
|
||||
annotation_data = annotator.get_data()
|
||||
if (
|
||||
not isinstance(annotation_data, dict)
|
||||
or "data" not in annotation_data
|
||||
or "info" not in annotation_data
|
||||
):
|
||||
return None
|
||||
info = annotation_data["info"]
|
||||
if not isinstance(info, dict) or "idToLabels" not in info:
|
||||
return None
|
||||
return annotation_data["data"], info["idToLabels"]
|
||||
|
||||
|
||||
def _get_motion_vectors(camera: Camera):
|
||||
annotator = _get_annotator(camera, "motion_vectors")
|
||||
if annotator is None:
|
||||
return None
|
||||
annotation_data = annotator.get_data()
|
||||
if isinstance(annotation_data, np.ndarray) and annotation_data.size > 0:
|
||||
return annotation_data
|
||||
return None
|
||||
|
||||
|
||||
def _get_camera2env_pose(camera: Camera):
|
||||
prim_path = getattr(camera, "prim_path", None)
|
||||
root_prim_path = getattr(camera, "root_prim_path", None)
|
||||
if not prim_path or not root_prim_path:
|
||||
return None
|
||||
return get_relative_transform(get_prim_at_path(prim_path), get_prim_at_path(root_prim_path))
|
||||
|
||||
|
||||
def _get_camera_params(camera: Camera):
|
||||
camera_matrix = getattr(camera, "is_camera_matrix", None)
|
||||
if camera_matrix is None:
|
||||
return None
|
||||
if isinstance(camera_matrix, np.ndarray):
|
||||
return camera_matrix.tolist()
|
||||
return camera_matrix
|
||||
|
||||
|
||||
def get_src(camera: Camera, data_type: str):
|
||||
if data_type == "rgb":
|
||||
return _get_rgb_image(camera)
|
||||
if data_type == "depth":
|
||||
return _get_depth_image(camera)
|
||||
if data_type == "seg":
|
||||
return _get_object_mask(camera)
|
||||
if data_type == "bbox2d_tight":
|
||||
return _get_bbox(camera, "bounding_box_2d_tight")
|
||||
if data_type == "bbox2d_loose":
|
||||
return _get_bbox(camera, "bounding_box_2d_loose")
|
||||
if data_type == "bbox3d":
|
||||
return _get_bbox(camera, "bounding_box_3d")
|
||||
if data_type == "motion_vectors":
|
||||
return _get_motion_vectors(camera)
|
||||
if data_type == "camera2env_pose":
|
||||
return _get_camera2env_pose(camera)
|
||||
if data_type == "camera_params":
|
||||
return _get_camera_params(camera)
|
||||
raise NotImplementedError(f"Unsupported source type: {data_type}")
|
||||
@@ -168,6 +168,8 @@ class SimBoxDualWorkFlow(NimbusWorkFlow):
|
||||
collect_info=self.task_cfg["data"]["collect_info"],
|
||||
version=self.task_cfg["data"].get("version", "v1.0"),
|
||||
)
|
||||
# Motion vectors are large dense tensors; keep LMDB logging opt-in.
|
||||
self.log_motion_vectors = bool(self.task_cfg["data"].get("log_motion_vectors", False))
|
||||
|
||||
if self.random_seed is not None:
|
||||
seed = self.random_seed
|
||||
@@ -530,16 +532,71 @@ class SimBoxDualWorkFlow(NimbusWorkFlow):
|
||||
for key, value in self.task.cameras.items():
|
||||
for robot_name, _ in self.task.robots.items():
|
||||
if robot_name in key:
|
||||
rgb_img = value.get_observations()["color_image"]
|
||||
camera_obs = value.get_observations()
|
||||
rgb_img = camera_obs["color_image"]
|
||||
# Special processing if enabled
|
||||
camera2env_pose = value.get_observations()["camera2env_pose"]
|
||||
camera2env_pose = camera_obs["camera2env_pose"]
|
||||
save_camera_name = key.replace(f"{robot_name}_", "")
|
||||
self.logger.add_color_image(robot_name, "images.rgb." + save_camera_name, rgb_img)
|
||||
self.logger.add_scalar_data(robot_name, "camera2env_pose." + save_camera_name, camera2env_pose)
|
||||
self.logger.add_color_image(
|
||||
robot_name, "images.rgb." + save_camera_name, rgb_img, step_idx=step_idx
|
||||
)
|
||||
if "depth_image" in camera_obs:
|
||||
depth_image = camera_obs["depth_image"]
|
||||
depth_img = np.nan_to_num(depth_img, nan=0.0, posinf=0.0, neginf=0.0)
|
||||
self.logger.add_depth_image(
|
||||
robot_name, "images.depth." + save_camera_name, depth_img, step_idx=step_idx
|
||||
)
|
||||
if "semantic_mask" in camera_obs:
|
||||
self.logger.add_seg_image(
|
||||
robot_name, "images.seg." + save_camera_name, seg_mask, step_idx=step_idx
|
||||
)
|
||||
if "semantic_mask_id2labels" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name,
|
||||
"labels.seg." + save_camera_name,
|
||||
camera_obs["semantic_mask_id2labels"],
|
||||
)
|
||||
if "bbox2d_tight" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name, "labels.bbox2d_tight." + save_camera_name, camera_obs["bbox2d_tight"]
|
||||
)
|
||||
if "bbox2d_tight_id2labels" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name,
|
||||
"labels.bbox2d_tight_id2labels." + save_camera_name,
|
||||
camera_obs["bbox2d_tight_id2labels"],
|
||||
)
|
||||
if "bbox2d_loose" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name, "labels.bbox2d_loose." + save_camera_name, camera_obs["bbox2d_loose"]
|
||||
)
|
||||
if "bbox2d_loose_id2labels" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name,
|
||||
"labels.bbox2d_loose_id2labels." + save_camera_name,
|
||||
camera_obs["bbox2d_loose_id2labels"],
|
||||
)
|
||||
if "bbox3d" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name, "labels.bbox3d." + save_camera_name, camera_obs["bbox3d"]
|
||||
)
|
||||
if "bbox3d_id2labels" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name,
|
||||
"labels.bbox3d_id2labels." + save_camera_name,
|
||||
camera_obs["bbox3d_id2labels"],
|
||||
)
|
||||
if self.log_motion_vectors and "motion_vectors" in camera_obs:
|
||||
self.logger.add_scalar_data(
|
||||
robot_name, "labels.motion_vectors." + save_camera_name, camera_obs["motion_vectors"]
|
||||
)
|
||||
self.logger.add_scalar_data(
|
||||
robot_name, "camera2env_pose." + save_camera_name, camera2env_pose
|
||||
)
|
||||
if step_idx == 0:
|
||||
save_camera_name = key.replace(f"{robot_name}_", "")
|
||||
self.logger.add_json_data(
|
||||
robot_name, f"{save_camera_name}_camera_params", value.get_observations()["camera_params"]
|
||||
robot_name, f"{save_camera_name}_camera_params", camera_obs["camera_params"]
|
||||
)
|
||||
|
||||
# depth_img = get_src(value, "depth")
|
||||
|
||||
Reference in New Issue
Block a user