From cf8b043bc85ddb58841d70f27887e837e7e08fdd Mon Sep 17 00:00:00 2001 From: Leon998 <1158046368@qq.com> Date: Wed, 18 Mar 2026 00:03:11 +0800 Subject: [PATCH] articulation tasks commit --- policy/lmdb2lerobotv21/convertv21_to_v30.py | 601 ++++++++++++++++++ .../simbox/core/objects/articulated_object.py | 10 +- workflows/simbox/core/utils/dr.py | 10 +- workflows/simbox_dual_workflow.py | 29 +- 4 files changed, 623 insertions(+), 27 deletions(-) create mode 100644 policy/lmdb2lerobotv21/convertv21_to_v30.py diff --git a/policy/lmdb2lerobotv21/convertv21_to_v30.py b/policy/lmdb2lerobotv21/convertv21_to_v30.py new file mode 100644 index 0000000..e1c5ffa --- /dev/null +++ b/policy/lmdb2lerobotv21/convertv21_to_v30.py @@ -0,0 +1,601 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.1 to +3.0. It will: + +- Generate per-episodes stats and writes them in `episodes_stats.jsonl` +- Check consistency between these new stats and the old ones. +- Remove the deprecated `stats.json`. +- Update codebase_version in `info.json`. +- Push this new version to the hub on the 'main' branch and tags it with "v3.0". + +Usage: + +Convert a local dataset (works in place): +```bash +python convert_dataset_v21_to_v30.py \ + --old-repo-id=v21/lift2_sim_long_horizon \ + --new-repo-id=lift2/lift2_sim_long_horizon +``` + +""" + +import argparse +import logging +import shutil +import glob, os +from pathlib import Path +from typing import Any +from pdb import set_trace + +import jsonlines +import pandas as pd +import pyarrow as pa +import tqdm +from datasets import Dataset, Features, Image +from huggingface_hub import HfApi, snapshot_download +from requests import HTTPError + +from lerobot.datasets.compute_stats import aggregate_stats +from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset +from lerobot.datasets.utils import ( + DEFAULT_CHUNK_SIZE, + DEFAULT_DATA_FILE_SIZE_IN_MB, + DEFAULT_DATA_PATH, + DEFAULT_VIDEO_FILE_SIZE_IN_MB, + DEFAULT_VIDEO_PATH, + LEGACY_EPISODES_PATH, + LEGACY_EPISODES_STATS_PATH, + LEGACY_TASKS_PATH, + cast_stats_to_numpy, + flatten_dict, + get_file_size_in_mb, + get_parquet_file_size_in_mb, + get_parquet_num_frames, + load_info, + update_chunk_file_indices, + write_episodes, + write_info, + write_stats, + write_tasks, +) +from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s +from lerobot.utils.constants import HF_LEROBOT_HOME +from lerobot.utils.utils import init_logging + +V21 = "v2.1" +V30 = "v3.0" + +""" +------------------------- +OLD +data/chunk-000/episode_000000.parquet + +NEW +data/chunk-000/file_000.parquet +------------------------- +OLD +videos/chunk-000/CAMERA/episode_000000.mp4 + +NEW +videos/CAMERA/chunk-000/file_000.mp4 +------------------------- +OLD +episodes.jsonl +{"episode_index": 1, "tasks": ["Put the blue block in the green bowl"], "length": 266} + +NEW +meta/episodes/chunk-000/episodes_000.parquet +episode_index | video_chunk_index | video_file_index | data_chunk_index | data_file_index | tasks | length +------------------------- +OLD +tasks.jsonl +{"task_index": 1, "task": "Put the blue block in the green bowl"} + +NEW +meta/tasks/chunk-000/file_000.parquet +task_index | task +------------------------- +OLD +episodes_stats.jsonl + +NEW +meta/episodes_stats/chunk-000/file_000.parquet +episode_index | mean | std | min | max +------------------------- +UPDATE +meta/info.json +------------------------- +""" + + +def load_jsonlines(fpath: Path) -> list[Any]: + with jsonlines.open(fpath, "r") as reader: + return list(reader) + + +def legacy_load_episodes(local_dir: Path) -> dict: + episodes = load_jsonlines(local_dir / LEGACY_EPISODES_PATH) + return {item["episode_index"]: item for item in sorted(episodes, key=lambda x: x["episode_index"])} + + +def legacy_load_episodes_stats(local_dir: Path) -> dict: + episodes_stats = load_jsonlines(local_dir / LEGACY_EPISODES_STATS_PATH) + return { + item["episode_index"]: cast_stats_to_numpy(item["stats"]) + for item in sorted(episodes_stats, key=lambda x: x["episode_index"]) + } + + +def legacy_load_tasks(local_dir: Path) -> tuple[dict, dict]: + tasks = load_jsonlines(local_dir / LEGACY_TASKS_PATH) + tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])} + task_to_task_index = {task: task_index for task_index, task in tasks.items()} + return tasks, task_to_task_index + + +def validate_local_dataset_version(local_path: Path) -> None: + """Validate that the local dataset has the expected v2.1 version.""" + info = load_info(local_path) + dataset_version = info.get("codebase_version", "unknown") + if dataset_version != V21: + raise ValueError( + f"Local dataset has codebase version '{dataset_version}', expected '{V21}'. " + f"This script is specifically for converting v2.1 datasets to v3.0." + ) + + +def convert_tasks(root, new_root): + logging.info(f"Converting tasks from {root} to {new_root}") + tasks, _ = legacy_load_tasks(root) + task_indices = tasks.keys() + task_strings = tasks.values() + df_tasks = pd.DataFrame({"task_index": task_indices}, index=task_strings) + write_tasks(df_tasks, new_root) + + +def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys): + import pyarrow.parquet as pq + import pyarrow as pa + from datasets import Features, Image + + # 1. Read all tables + tables = [pq.read_table(f) for f in paths_to_cat] + + # 2. Concatenate with type promotion + table = pa.concat_tables(tables, promote=True) + + # 3. Build HF Features from arrow schema + features = Features.from_arrow_schema(table.schema) + + # 4. Override image columns to be HF Image() + for key in image_keys: + features[key] = Image() + + # 5. Convert back to arrow schema with updated metadata + arrow_schema = features.arrow_schema + + # 6. Write parquet with correct schema + path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx) + path.parent.mkdir(parents=True, exist_ok=True) + + pq.write_table(table.cast(arrow_schema), path) + + + +def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int): + data_dir = root / "data" + ep_paths = sorted(data_dir.glob("*/*.parquet")) + + image_keys = get_image_keys(root) + + ep_idx = 0 + chunk_idx = 0 + file_idx = 0 + size_in_mb = 0 + num_frames = 0 + paths_to_cat = [] + episodes_metadata = [] + + logging.info(f"Converting data files from {len(ep_paths)} episodes") + + for ep_path in tqdm.tqdm(ep_paths, desc="convert data files"): + ep_size_in_mb = get_parquet_file_size_in_mb(ep_path) + ep_num_frames = get_parquet_num_frames(ep_path) + ep_metadata = { + "episode_index": ep_idx, + "data/chunk_index": chunk_idx, + "data/file_index": file_idx, + "dataset_from_index": num_frames, + "dataset_to_index": num_frames + ep_num_frames, + } + size_in_mb += ep_size_in_mb + num_frames += ep_num_frames + episodes_metadata.append(ep_metadata) + ep_idx += 1 + + if size_in_mb < data_file_size_in_mb: + paths_to_cat.append(ep_path) + continue + + if paths_to_cat: + concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys) + + # Reset for the next file + size_in_mb = ep_size_in_mb + paths_to_cat = [ep_path] + + chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE) + + # Write remaining data if any + if paths_to_cat: + concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys) + + return episodes_metadata + + +def get_video_keys(root): + info = load_info(root) + features = info["features"] + video_keys = [key for key, ft in features.items() if ft["dtype"] == "video"] + return video_keys + + +def get_image_keys(root): + info = load_info(root) + features = info["features"] + image_keys = [key for key, ft in features.items() if ft["dtype"] == "image"] + return image_keys + + +def convert_videos(root: Path, new_root: Path, video_file_size_in_mb: int): + logging.info(f"Converting videos from {root} to {new_root}") + + video_keys = get_video_keys(root) + if len(video_keys) == 0: + return None + + video_keys = sorted(video_keys) + + eps_metadata_per_cam = [] + for camera in video_keys: + eps_metadata = convert_videos_of_camera(root, new_root, camera, video_file_size_in_mb) + eps_metadata_per_cam.append(eps_metadata) + + num_eps_per_cam = [len(eps_cam_map) for eps_cam_map in eps_metadata_per_cam] + if len(set(num_eps_per_cam)) != 1: + raise ValueError(f"All cams dont have same number of episodes ({num_eps_per_cam}).") + + episods_metadata = [] + num_cameras = len(video_keys) + num_episodes = num_eps_per_cam[0] + for ep_idx in tqdm.tqdm(range(num_episodes), desc="convert videos"): + # Sanity check + ep_ids = [eps_metadata_per_cam[cam_idx][ep_idx]["episode_index"] for cam_idx in range(num_cameras)] + ep_ids += [ep_idx] + if len(set(ep_ids)) != 1: + raise ValueError(f"All episode indices need to match ({ep_ids}).") + + ep_dict = {} + for cam_idx in range(num_cameras): + ep_dict.update(eps_metadata_per_cam[cam_idx][ep_idx]) + episods_metadata.append(ep_dict) + + return episods_metadata + + +def convert_videos_of_camera(root: Path, new_root: Path, video_key: str, video_file_size_in_mb: int): + # Access old paths to mp4 + videos_dir = root / "videos" + ep_paths = sorted(videos_dir.glob(f"*/{video_key}/*.mp4")) + + ep_idx = 0 + chunk_idx = 0 + file_idx = 0 + size_in_mb = 0 + duration_in_s = 0.0 + paths_to_cat = [] + episodes_metadata = [] + + for ep_path in tqdm.tqdm(ep_paths, desc=f"convert videos of {video_key}"): + ep_size_in_mb = get_file_size_in_mb(ep_path) + ep_duration_in_s = get_video_duration_in_s(ep_path) + + # Check if adding this episode would exceed the limit + if size_in_mb + ep_size_in_mb >= video_file_size_in_mb and len(paths_to_cat) > 0: + # Size limit would be exceeded, save current accumulation WITHOUT this episode + concatenate_video_files( + paths_to_cat, + new_root + / DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx), + ) + + # Update episodes metadata for the file we just saved + for i, _ in enumerate(paths_to_cat): + past_ep_idx = ep_idx - len(paths_to_cat) + i + episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx + episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx + + # Move to next file and start fresh with current episode + chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE) + size_in_mb = 0 + duration_in_s = 0.0 + paths_to_cat = [] + + # Add current episode metadata + ep_metadata = { + "episode_index": ep_idx, + f"videos/{video_key}/chunk_index": chunk_idx, # Will be updated when file is saved + f"videos/{video_key}/file_index": file_idx, # Will be updated when file is saved + f"videos/{video_key}/from_timestamp": duration_in_s, + f"videos/{video_key}/to_timestamp": duration_in_s + ep_duration_in_s, + } + episodes_metadata.append(ep_metadata) + + # Add current episode to accumulation + paths_to_cat.append(ep_path) + size_in_mb += ep_size_in_mb + duration_in_s += ep_duration_in_s + ep_idx += 1 + + # Write remaining videos if any + if paths_to_cat: + concatenate_video_files( + paths_to_cat, + new_root + / DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx), + ) + + # Update episodes metadata for the final file + for i, _ in enumerate(paths_to_cat): + past_ep_idx = ep_idx - len(paths_to_cat) + i + episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx + episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx + + return episodes_metadata + + +def generate_episode_metadata_dict( + episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_videos=None +): + num_episodes = len(episodes_metadata) + episodes_legacy_metadata_vals = list(episodes_legacy_metadata.values()) + episodes_stats_vals = list(episodes_stats.values()) + episodes_stats_keys = list(episodes_stats.keys()) + + for i in range(num_episodes): + ep_legacy_metadata = episodes_legacy_metadata_vals[i] + ep_metadata = episodes_metadata[i] + ep_stats = episodes_stats_vals[i] + + ep_ids_set = { + ep_legacy_metadata["episode_index"], + ep_metadata["episode_index"], + episodes_stats_keys[i], + } + + if episodes_videos is None: + ep_video = {} + else: + ep_video = episodes_videos[i] + ep_ids_set.add(ep_video["episode_index"]) + + if len(ep_ids_set) != 1: + raise ValueError(f"Number of episodes is not the same ({ep_ids_set}).") + + ep_dict = {**ep_metadata, **ep_video, **ep_legacy_metadata, **flatten_dict({"stats": ep_stats})} + ep_dict["meta/episodes/chunk_index"] = 0 + ep_dict["meta/episodes/file_index"] = 0 + yield ep_dict + + +def convert_episodes_metadata(root, new_root, episodes_metadata, episodes_video_metadata=None): + logging.info(f"Converting episodes metadata from {root} to {new_root}") + + episodes_legacy_metadata = legacy_load_episodes(root) + episodes_stats = legacy_load_episodes_stats(root) + + num_eps_set = {len(episodes_legacy_metadata), len(episodes_metadata)} + if episodes_video_metadata is not None: + num_eps_set.add(len(episodes_video_metadata)) + + if len(num_eps_set) != 1: + raise ValueError(f"Number of episodes is not the same ({num_eps_set}).") + + ds_episodes = Dataset.from_generator( + lambda: generate_episode_metadata_dict( + episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_video_metadata + ) + ) + write_episodes(ds_episodes, new_root) + + stats = aggregate_stats(list(episodes_stats.values())) + write_stats(stats, new_root) + + +def convert_info(root, new_root, data_file_size_in_mb, video_file_size_in_mb): + info = load_info(root) + info["codebase_version"] = V30 + del info["total_chunks"] + del info["total_videos"] + info["data_files_size_in_mb"] = data_file_size_in_mb + info["video_files_size_in_mb"] = video_file_size_in_mb + info["data_path"] = DEFAULT_DATA_PATH + info["video_path"] = DEFAULT_VIDEO_PATH if info["video_path"] is not None else None + info["fps"] = int(info["fps"]) + logging.info(f"Converting info from {root} to {new_root}") + for key in info["features"]: + if info["features"][key]["dtype"] == "video": + # already has fps in video_info + continue + info["features"][key]["fps"] = info["fps"] + write_info(info, new_root) + + +def convert_dataset( + load_path: str | Path | None = None, + save_path: str | Path | None = None, + branch: str | None = None, + data_file_size_in_mb: int | None = None, + video_file_size_in_mb: int | None = None, + push_to_hub: bool = True, + force_conversion: bool = False, + start_ratio: float = 0.0, + end_ratio: float = 1.0, +): + if data_file_size_in_mb is None: + data_file_size_in_mb = DEFAULT_DATA_FILE_SIZE_IN_MB + if video_file_size_in_mb is None: + video_file_size_in_mb = DEFAULT_VIDEO_FILE_SIZE_IN_MB + + # # First check if the dataset already has a v3.0 version + # if save_root is None and not force_conversion: + # try: + # print("Trying to download v3.0 version of the dataset from the hub...") + # snapshot_download(old_repo_id, repo_type="dataset", revision=V30, local_dir=HF_LEROBOT_HOME / old_repo_id) + # except Exception: + # print("Dataset does not have an uploaded v3.0 version. Continuing with conversion.") + + # Set root based on whether local dataset path is provided + use_local_dataset = False + # root = HF_LEROBOT_HOME / old_repo_id if root is None else Path(root) / old_repo_id + # root = Path(load_root) / old_repo_id + root = Path(load_path) + if root.exists(): + validate_local_dataset_version(root) + use_local_dataset = True + print(f"Using local dataset at {root}") + + + # new_root = HF_LEROBOT_HOME / new_repo_id + new_root = Path(save_path) + + # Handle old_root cleanup if both old_root and root exist + + if new_root.is_dir(): + return + shutil.rmtree(new_root) + + try: + convert_info(root, new_root, data_file_size_in_mb, video_file_size_in_mb) + convert_tasks(root, new_root) + episodes_metadata = convert_data(root, new_root, data_file_size_in_mb) + episodes_videos_metadata = convert_videos(root, new_root, video_file_size_in_mb) + convert_episodes_metadata(root, new_root, episodes_metadata, episodes_videos_metadata) + except: + shutil.rmtree(new_root) + +if __name__ == "__main__": + init_logging() + parser = argparse.ArgumentParser() + # parser.add_argument( + # "--old-repo-id", + # type=str, + # required=True, + # help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset " + # "(e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", + # ) + # parser.add_argument( + # "--new-repo-id", + # type=str, + # required=True, + # help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset " + # "(e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", + # ) + parser.add_argument( + "--start_ratio", + type=float, + default=0.0 + ) + parser.add_argument( + "--end_ratio", + type=float, + default=1.0 + ) + parser.add_argument( + "--branch", + type=str, + default=None, + help="Repo branch to push your dataset. Defaults to the main branch.", + ) + parser.add_argument( + "--data-file-size-in-mb", + type=int, + default=None, + help="File size in MB. Defaults to 100 for data and 500 for videos.", + ) + parser.add_argument( + "--video-file-size-in-mb", + type=int, + default=None, + help="File size in MB. Defaults to 100 for data and 500 for videos.", + ) + # parser.add_argument( + # "--load-root", + # type=str, + # default=None, + # help="Local directory to use for downloading the dataset.", + # ) + # parser.add_argument( + # "--save-root", + # type=str, + # default=None, + # help="Local directory to use for writing the dataset.", + # ) + parser.add_argument( + "--push-to-hub", + type=lambda input: input.lower() == "true", + default=True, + help="Push the converted dataset to the hub.", + ) + parser.add_argument( + "--force-conversion", + action="store_true", + help="Force conversion even if the dataset already has a v3.0 version.", + ) + + args = parser.parse_args() + + load_root_path = "/mnt/shared-storage-user/internvla/InternData-A1-realese/v2.0-stable/InternData-A1/sim" + save_root_path = "/mnt/shared-storage-user/internvla/InternData-A1-realese/v2.0-stable/InternData-A1/sim_lerobotv30" + # load_paths = ( + # glob.glob(os.path.join(load_root_path, "articulation_tasks", "*", "*")) + \ + # glob.glob(os.path.join(load_root_path, "basic_tasks", "*", "*")) + \ + # glob.glob(os.path.join(load_root_path, "long_horizon_tasks", "*", "*")) + # ) + # load_paths += (glob.glob(os.path.join(load_root_path, "pick_and_place_tasks", "*", "*", "*"))) + + load_paths = (glob.glob(os.path.join(load_root_path, "long_horizon_tasks", "lift2", "*collaborate_assemble_a_beef_sandwich_part3*"))) + load_paths += (glob.glob(os.path.join(load_root_path, "long_horizon_tasks", "split_aloha", "*collaborate_assemble_a_beef_sandwich*"))) + + load_paths.sort() + num_eps = len(load_paths) + start_eps = int(num_eps * args.start_ratio) + end_eps = int(num_eps * args.end_ratio) + 1 + + print("start_eps :", start_eps, "end_eps :", end_eps) + + for load_path in tqdm.tqdm(load_paths[start_eps:end_eps]): + save_path = load_path.replace(load_root_path, save_root_path) + repo_id = load_path.split("/")[-1] + robot_id = load_path.split("/")[-2] + task_type = load_path.split("/")[-3] + print(f"Converting {task_type} {robot_id} {repo_id} task to lerobot v30") + args.load_path = load_path + args.save_path = save_path + convert_dataset(**vars(args)) \ No newline at end of file diff --git a/workflows/simbox/core/objects/articulated_object.py b/workflows/simbox/core/objects/articulated_object.py index 98fc035..cf010c5 100644 --- a/workflows/simbox/core/objects/articulated_object.py +++ b/workflows/simbox/core/objects/articulated_object.py @@ -19,15 +19,16 @@ class ArticulatedObject(Articulation): def __init__(self, asset_root, root_prim_path, cfg, *args, **kwargs): self.asset_root = asset_root self.object_name = cfg["name"] - self.asset_relative_path = cfg["path"] - self.object_dir = os.path.join(asset_root, cfg["path"]) + self.usd_path = os.path.join(asset_root, cfg["path"]) self._root_prim_path = root_prim_path - object_info_path = os.path.join(asset_root, cfg["obj_info_path"]) + info_name = cfg["info_name"] + object_info_path = self.usd_path.replace("instance.usd", f"Kps/{info_name}/info.json") with open(object_info_path, "r", encoding="utf-8") as f: object_info = json.load(f) self.category = cfg["category"] self.cfg = cfg self.get_articulated_info(object_info) + self.cfg["scale"] = self.object_scale[:3] super().__init__(prim_path=self.object_prim_path, name=cfg["name"], *args, **kwargs) def update_articulated_info(self, obj_info_path): @@ -81,7 +82,6 @@ class ArticulatedObject(Articulation): for key, item in self.object_keypoints.items(): self.object_keypoints[key] = np.append(item, [1.0], axis=0) self.object_scale = np.array(object_info["object_scale"]) - self.object_usd = os.path.join(self.object_dir, "instance.usd") for key, item in object_info.items(): if key in prim_path_list: setattr(self, key, self._root_prim_path + object_info[key]) @@ -96,7 +96,7 @@ class ArticulatedObject(Articulation): self.object_joint_number = 0 # Contact plane normal self.contact_plane_normal = None - add_reference_to_stage(usd_path=self.object_usd, prim_path=self.object_prim_path) + add_reference_to_stage(usd_path=self.usd_path, prim_path=self.object_prim_path) def get_joint_position(self, stage): joint_parent_prim = stage.GetPrimAtPath(self.object_joint_path.rsplit("/", 1)[0]) diff --git a/workflows/simbox/core/utils/dr.py b/workflows/simbox/core/utils/dr.py index 550fd76..611e85b 100644 --- a/workflows/simbox/core/utils/dr.py +++ b/workflows/simbox/core/utils/dr.py @@ -108,17 +108,11 @@ def update_articulated_objs(cfg): for obj_cfg in cfg["objects"]: apply_randomization = obj_cfg.get("apply_randomization", False) if apply_randomization and obj_cfg["target_class"] == "ArticulatedObject": - dirs = os.path.join(cfg["asset_root"], os.path.dirname(obj_cfg["path"])) + dirs = os.path.join(cfg["asset_root"], os.path.dirname(os.path.dirname(obj_cfg["path"]))) paths = glob.glob(os.path.join(dirs, "*")) paths.sort() path = random.choice(paths) - # left hearth 0.5: [1, 2, 5, 6, 13, ] ; - # left hearth 0.785 [3, 4, 7, 8, 9, 11, 12, 14, 15, 16, 17] - # left hearth no planning [0, 10, 18, 19] - - # right hearth 0.5: [0, 1, 4, 10, 11] - # right hearth 0.785: [2, 3, 5, 6, 7, 8, 9, ] info_name = obj_cfg["info_name"] info_path = f"{path}/Kps/{info_name}/info.json" with open(info_path, "r", encoding="utf-8") as file: @@ -126,7 +120,7 @@ def update_articulated_objs(cfg): scale = info["object_scale"][:3] asset_root = cfg["asset_root"] - obj_cfg["path"] = path.replace(f"{asset_root}/", "", 1) + obj_cfg["path"] = path.replace(f"{asset_root}/", "", 1) + "/instance.usd" obj_cfg["category"] = path.split("/")[-2] obj_cfg["obj_info_path"] = info_path.replace(f"{asset_root}/", "", 1) obj_cfg["scale"] = scale diff --git a/workflows/simbox_dual_workflow.py b/workflows/simbox_dual_workflow.py index 830de5e..9ec1a7a 100644 --- a/workflows/simbox_dual_workflow.py +++ b/workflows/simbox_dual_workflow.py @@ -87,21 +87,22 @@ class SimBoxDualWorkFlow(NimbusWorkFlow): for obj_cfg in self.task_cfg["objects"]: if obj_cfg["target_class"] == "ArticulatedObject": - asset_root = self.task_cfg["asset_root"] - art_paths = glob.glob(os.path.join(asset_root, obj_cfg["art_cat"], "*")) - art_paths.sort() - path = random.choice(art_paths) - info_name = obj_cfg["info_name"] - info_path = f"{path}/Kps/{info_name}/info.json" - with open(info_path, "r", encoding="utf-8") as f: - info = json.load(f) - scale = info["object_scale"][:3] + if obj_cfg.get("apply_randomization", False): + asset_root = self.task_cfg["asset_root"] + art_paths = glob.glob(os.path.join(asset_root, obj_cfg["art_cat"], "*")) + art_paths.sort() + path = random.choice(art_paths) + info_name = obj_cfg["info_name"] + info_path = f"{path}/Kps/{info_name}/info.json" + with open(info_path, "r", encoding="utf-8") as f: + info = json.load(f) + scale = info["object_scale"][:3] - obj_cfg["path"] = path.replace(f"{asset_root}/", "", 1) - obj_cfg["category"] = path.split("/")[-2] - obj_cfg["obj_info_path"] = info_path.replace(f"{asset_root}/", "", 1) - obj_cfg["scale"] = scale - self.task_cfg["data"]["collect_info"] = obj_cfg["category"] + obj_cfg["path"] = path.replace(f"{asset_root}/", "", 1) + "/instance.usd" + obj_cfg["category"] = path.split("/")[-2] + obj_cfg["obj_info_path"] = info_path.replace(f"{asset_root}/", "", 1) + obj_cfg["scale"] = scale + self.task_cfg["data"]["collect_info"] = obj_cfg["category"] self.task_cfg.pop("arena_file", None) self.task_cfg.pop("camera_file", None)