policy / README / lmdb2lerobot

2026-03-17 22:27:10 +08:00
parent 9a35cc7955
commit 2427fdb9d2
10 changed files with 4279 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <div align="center">

-# InternDataEngine: A simulation-based data generation engine designed for robotic learning.
+# InternDataEngine: Pioneering High-Fidelity Synthetic Data Generator for Robotic Manipulation

 </div>

@@ -29,7 +29,7 @@ InternDataEngine is a synthetic data generation engine for embodied AI that powe

 ## 🔥 Latest News

- **[2026/03]** We release the InternDataEngine codebase, which includes the core modules: InternData-A1, Nimbus, and InternData-M1.
+- **[2026/03]** We release the InternDataEngine codebase v1.0, which includes the core modules: InternData-A1 and Nimbus.

 ## 🚀 Quickstart

--- a/policy/lmdb2lerobotv21/lmdb2lerobot_franka_a1.py
+++ b/policy/lmdb2lerobotv21/lmdb2lerobot_franka_a1.py
@@ -0,0 +1,799 @@
+import argparse
+import json
+import logging
+import os
+import gc
+import shutil
+import torchvision
+import cv2
+import h5py
+import lmdb
+import numpy as np
+import pickle
+import torch
+import pinocchio as pin
+import time
+import ray
+import logging
+import pdb
+import os
+import imageio # imageio-ffmpeg
+
+from PIL import Image
+from tqdm import tqdm
+from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import check_timestamps_sync, get_episode_data_index, validate_episode_buffer
+from ray.runtime_env import RuntimeEnv
+from scipy.spatial.transform import Rotation as R
+from copy import deepcopy
+from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple
+
+"""
+    Store both camera image and robot state as a combined observation. 
+    Args:
+        observation: images(camera), states (robot state)
+        actions: joint, gripper, ee_pose
+"""
+FEATURES = {
+    "images.rgb.head": {
+        "dtype": "video",
+        "shape": (360, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "head_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "head_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "states.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "states.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+    "states.ee_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.tcp_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "states.robot_to_env_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "actions.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "actions.gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "actions.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+    "actions.ee_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "actions.tcp_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "master_actions.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "master_actions.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "master_actions.gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "master_actions.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+}
+
+class FrankaDataset(LeRobotDataset):
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        download_videos: bool = True,
+        local_files_only: bool = False,
+        video_backend: str | None = None,
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            root=root,
+            episodes=episodes,
+            image_transforms=image_transforms,
+            delta_timestamps=delta_timestamps,
+            tolerance_s=tolerance_s,
+            download_videos=download_videos,
+            local_files_only=local_files_only,
+            video_backend=video_backend,
+        )
+
+    def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
+        if not episode_data:
+            episode_buffer = self.episode_buffer
+
+        validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+        episode_length = episode_buffer.pop("size")
+        tasks = episode_buffer.pop("task")
+        episode_tasks = list(set(tasks))
+        episode_index = episode_buffer["episode_index"]
+
+        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
+        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
+
+        for task in episode_tasks:
+            task_index = self.meta.get_task_index(task)
+            if task_index is None:
+                self.meta.add_task(task)
+
+        episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
+        for key, ft in self.features.items():
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
+                continue
+            episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
+        for key in self.meta.video_keys:
+            video_path = self.root / self.meta.get_video_file_path(episode_index, key)
+            episode_buffer[key] = str(video_path)  # PosixPath -> str
+            video_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copyfile(videos[key], video_path)
+        ep_stats = compute_episode_stats(episode_buffer, self.features)
+        self._save_episode_table(episode_buffer, episode_index)
+        self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
+        ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
+        ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
+        check_timestamps_sync(
+            episode_buffer["timestamp"],
+            episode_buffer["episode_index"],
+            ep_data_index_np,
+            self.fps,
+            self.tolerance_s,
+        )
+        if not episode_data:
+            self.episode_buffer = self.create_episode_buffer()
+
+
+    def add_frame(self, frame: dict) -> None:
+        for name in frame:
+            if isinstance(frame[name], torch.Tensor):
+                frame[name] = frame[name].numpy()
+        features = {key: value for key, value in self.features.items() if key in self.hf_features}
+        if self.episode_buffer is None:
+            self.episode_buffer = self.create_episode_buffer()
+        frame_index = self.episode_buffer["size"]
+        timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(timestamp)
+
+        for key in frame:
+            if key == "task":
+                self.episode_buffer["task"].append(frame["task"])
+                continue
+            if key not in self.features:
+                raise ValueError(f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'.")
+            self.episode_buffer[key].append(frame[key])
+        self.episode_buffer["size"] += 1
+
+# def crop_resize_no_padding(image, target_size=(480, 640)):
+#     """
+#     Crop and scale to target size (no padding)
+#     :param image: input image (NumPy array)
+#     :param target_size: target size (height, width)
+#     :return: processed image
+#     """
+#     h, w = image.shape[:2]
+#     target_h, target_w = target_size
+#     target_ratio = target_w / target_h  # Target aspect ratio (e.g. 640/480=1.333)
+
+#     # the original image aspect ratio and cropping direction
+#     if w / h > target_ratio:  # Original image is wider → crop width
+#         crop_w = int(h * target_ratio)  # Calculate crop width based on target aspect ratio
+#         crop_h = h
+#         start_x = (w - crop_w) // 2  # Horizontal center starting point
+#         start_y = 0
+#     else:  # Original image is higher → crop height
+#         crop_h = int(w / target_ratio)  # Calculate clipping height according to target aspect ratio
+#         crop_w = w
+#         start_x = 0
+#         start_y = (h - crop_h) // 2  # Vertical center starting point
+
+#     # Perform centered cropping (to prevent out-of-bounds)
+#     start_x, start_y = max(0, start_x), max(0, start_y)
+#     end_x, end_y = min(w, start_x + crop_w), min(h, start_y + crop_h)
+#     cropped = image[start_y:end_y, start_x:end_x]
+
+#     # Resize to target size (bilinear interpolation)
+#     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+#     return resized
+
+def tf2xyzwxyz(posetf):
+    translation = posetf[:3, 3]
+    orientation = R.from_matrix(posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+    xyzwxyz = (np.concatenate([translation, orientation])).astype("float32")
+
+    return xyzwxyz
+
+def load_lmdb_data(episode_path: Path, sava_path: Path, fps_factor: int, target_fps: int) -> Optional[Dict]:
+    def load_image(txn, key):
+        raw = txn.get(key)
+        data = pickle.loads(raw)
+        image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        # Convert to RGB if necessary
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # image = crop_resize_no_padding(image, target_size=(480, 640))
+        return image
+    armbase_to_robot_pose = np.eye(4)
+    rot_x = np.eye(4)
+    rot_x[1][1] = -1.0
+    rot_x[2][2] = -1.0
+    tcp2ee_pose = np.eye(4)
+    tcp2ee_pose[2, 3] = 0.095
+
+    model = pin.buildModelFromUrdf("../assets/franka_description/franka_panda.urdf")
+    data = model.createData()
+
+    try:
+        env = lmdb.open(
+            str(episode_path / "lmdb"),
+            readonly=True,
+            lock=False,
+            max_readers=128,
+            readahead=False
+        )
+        meta_info = pickle.load(open(episode_path/"meta_info.pkl", "rb"))
+        with env.begin(write=False) as txn:
+            keys = [k for k, _ in txn.cursor()]
+            # import pdb; pdb.set_trace()
+            qpos_keys = ['states.joint.position', 'states.gripper.position', 'states.gripper.pose',]
+            master_action_keys = ['master_actions.joint.position', 'master_actions.gripper.position', 'master_actions.gripper.openness', 'master_actions.gripper.pose',]
+            action_keys = ['actions.joint.position', 'actions.gripper.position', 'actions.gripper.openness', 'actions.gripper.pose',]
+            image_keys = ['images.rgb.head', 'images.rgb.hand',]
+            compute_qpos_keys = ['states.joint.position']
+            additional_action_keys = ["actions.ee_to_armbase_pose", "actions.ee_to_robot_pose", "actions.tcp_to_armbase_pose", "actions.tcp_to_robot_pose"]
+            robot2env_keys = ['robot2env_pose']
+            intrinsics_keys = ['json_data']
+            camera2env_keys = ["camera2env_pose.head", "camera2env_pose.hand"]
+            num_steps = meta_info["num_steps"]
+            total_steps = []
+            for image_key in image_keys:
+                keys_image_per_step = meta_info['keys'][image_key]
+                total_steps.append(len(keys_image_per_step))
+            
+            state_action_dict = {}
+            ### qpos
+            for key in qpos_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            state_keys = list(state_action_dict.keys())
+            # ### next qpos as action
+            # for k in state_keys:
+            #     state_action_dict[k.replace("states", "actions")] = np.concatenate([state_action_dict[k][1:, :], state_action_dict[k][-1, :][None,:]], axis=0)
+            ### master action
+            for key in master_action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                #if "openness" in key:
+                if len(state_action_dict[key].shape)==1:
+                    state_action_dict[key] = state_action_dict[key][:, np.newaxis]
+                total_steps.append(len(state_action_dict[key]))
+            ### action
+            for key in action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                #if "openness" in key:
+                if len(state_action_dict[key].shape)==1:
+                    state_action_dict[key] = state_action_dict[key][:, np.newaxis]
+                total_steps.append(len(state_action_dict[key]))
+            ### ee & tcp pose proprio
+            for compute_qpos_key in compute_qpos_keys:
+                compute_qpos = pickle.loads(txn.get(compute_qpos_key.encode()))
+                ee_to_armbase_poses = []
+                ee_to_robot_poses = []
+                tcp_to_armbase_poses = []
+                tcp_to_robot_poses = []                
+                for each_compute_qpos in compute_qpos:
+                    q = np.zeros(model.nq)   # 关节角
+                    ndim = each_compute_qpos.shape[0]
+                    q[:ndim] = each_compute_qpos
+                    pin.forwardKinematics(model, data, q)
+                    pin.updateFramePlacements(model, data)
+
+                    fid_a = model.getFrameId("base_link")
+                    fid_b = model.getFrameId("panda_hand")
+
+                    T_a = data.oMf[fid_a]    # world -> a
+                    T_b = data.oMf[fid_b]    # world -> b
+                    T_a_b = T_a.inverse() * T_b
+
+                    ee2a_translation = T_a_b.homogeneous[:3, 3]
+                    ee2a_orientation = R.from_matrix(T_a_b.homogeneous[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_armbase_pose = (np.concatenate([ee2a_translation, ee2a_orientation])).astype("float32")
+                    ee_to_armbase_poses.append(ee_to_armbase_pose)
+
+                    tcp_to_arm_base_posetf = T_a_b.homogeneous @ tcp2ee_pose
+                    tcp_to_arm_base_translation = tcp_to_arm_base_posetf[:3, 3]
+                    tcp_to_arm_base_orientation = R.from_matrix(tcp_to_arm_base_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_armbase_pose = (np.concatenate([tcp_to_arm_base_translation, tcp_to_arm_base_orientation])).astype("float32")
+                    tcp_to_armbase_poses.append(tcp_to_armbase_pose)
+                    ee_to_robot_posetf = armbase_to_robot_pose @ T_a_b.homogeneous
+                    ee2r_translation = ee_to_robot_posetf[:3, 3]
+                    ee2r_orientation = R.from_matrix(ee_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_robot_pose = (np.concatenate([ee2r_translation, ee2r_orientation])).astype("float32")
+                    ee_to_robot_poses.append(ee_to_robot_pose)
+                    
+                    tcp_to_robot_posetf = ee_to_robot_posetf @ tcp2ee_pose
+                    tcp_to_robot_translation = tcp_to_robot_posetf[:3, 3]
+                    tcp_to_robot_orientation = R.from_matrix(tcp_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_robot_pose = (np.concatenate([tcp_to_robot_translation, tcp_to_robot_orientation])).astype("float32")
+                    tcp_to_robot_poses.append(tcp_to_robot_pose)
+
+                ee2a_key = f"states.ee_to_armbase_pose"
+                ee2r_key = f"states.ee_to_robot_pose"
+                tcp2a_key = f"states.tcp_to_armbase_pose"
+                tcp2r_key = f"states.tcp_to_robot_pose"
+
+                state_action_dict[ee2a_key] = np.stack(ee_to_armbase_poses)
+                state_action_dict[ee2r_key] = np.stack(ee_to_robot_poses)
+                state_action_dict[tcp2a_key] = np.stack(tcp_to_armbase_poses)
+                state_action_dict[tcp2r_key] = np.stack(tcp_to_robot_poses)
+            ### ee & tcp pose action
+            for additional_action_key in additional_action_keys:
+                additional_state_key = additional_action_key.replace("actions", "states")
+                additional_state = state_action_dict[additional_state_key]
+                additional_action = np.concatenate([additional_state[1:, :], additional_state[-1, :][None,:]], axis=0)
+                state_action_dict[additional_action_key] = additional_action
+            ### intrinsics pose 
+            for intrinsics_key in intrinsics_keys:
+                intrinsics_params = pickle.loads(txn.get(intrinsics_key.encode()))
+                hand_camera_params = intrinsics_params["hand_camera_params"]
+                hand_camera_params = (np.array([hand_camera_params[0][0], hand_camera_params[1][1], hand_camera_params[0][2], hand_camera_params[1][2]])).astype("float32")
+                head_camera_params = intrinsics_params["head_camera_params"]
+                head_camera_params = (np.array([head_camera_params[0][0], head_camera_params[1][1], head_camera_params[0][2], head_camera_params[1][2]])).astype("float32")
+                if head_camera_params[2] >= 500:
+                    head_camera_params /= 2
+                state_action_dict["head_camera_intrinsics"] = np.stack([head_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_camera_intrinsics"] = np.stack([hand_camera_params for _ in range(num_steps)])
+            ### robot2env pose
+            for robot2env_key in robot2env_keys:
+                robot2env_pose_tfs = pickle.loads(txn.get(robot2env_key.encode()))
+                robot2env_pose_7ds = []
+                for robot2env_pose_tf in robot2env_pose_tfs:
+                    translation = robot2env_pose_tf[:3, 3]
+                    orientation = R.from_matrix(robot2env_pose_tf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    robot2env_pose_7d = (np.concatenate([translation, orientation])).astype("float32")
+                    robot2env_pose_7ds.append(robot2env_pose_7d)
+                state_action_dict[robot2env_key] = np.stack(robot2env_pose_7ds)
+            ### camera2env pose
+            for camera2env_key in camera2env_keys:
+                camera2env_pose_tfs = pickle.loads(txn.get(camera2env_key.encode()))
+                camera2robot_poses = []
+                for frame_idx in range(len(camera2env_pose_tfs)):
+                    camera2env_posetf = camera2env_pose_tfs[frame_idx]
+                    robot2env_pose_tf = robot2env_pose_tfs[frame_idx]
+                    camera2robot_pose_tf = np.linalg.inv(robot2env_pose_tf) @ camera2env_posetf @ rot_x
+                    camera2robot_poses.append(tf2xyzwxyz(camera2robot_pose_tf))
+                
+                if camera2env_key == "camera2env_pose.head":
+                    state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand":
+                     state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+
+            unique_steps = list(set(total_steps))
+            print("episode_path:", episode_path)
+            print("total_steps: ", total_steps)
+            assert len(unique_steps) == 1 and unique_steps[0]>0, f"no data found or qpos / image steps mismatch in {episode_path}"
+            assert unique_steps[0]>100, f"Episode length of {episode_path} is {unique_steps[0]}, which does not meet requirements"
+            assert np.max(np.abs(state_action_dict["states.joint.position"])) < 2*np.pi
+            selected_steps = [step for step in range(unique_steps[0]) if step % fps_factor == 0]
+            frames = []
+            image_observations = {}
+            for image_key in image_keys:
+                image_observations[image_key] = []
+            start_time = time.time()
+            for step_index, step in enumerate(selected_steps):
+                step_str = f"{step:04d}"
+                data_dict = {}
+                for key, value in state_action_dict.items():
+                    # if "forlan2robot_pose" in key:
+                    #     data_dict["states.ee_to_armbase_pose"] = value[step]
+                    if "robot2env_pose" in key:
+                        data_dict["states.robot_to_env_pose"] = value[step]
+                    else:
+                        data_dict[key] = value[step]
+                data_dict["task"] = meta_info['language_instruction']
+                frames.append(data_dict)
+                # import pdb; pdb.set_trace()
+                for image_key in image_keys:
+                    image_key_step_encode = f"{image_key}/{step_str}".encode()
+                    if not image_key_step_encode in keys:
+                        raise ValueError(f"Image key {image_key_step_encode} not found in LMDB keys.")
+                    image_observations[image_key].append(load_image(txn, image_key_step_encode))
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"load image_observations of {episode_path}")
+        env.close()
+        if not frames:
+            return None
+        os.makedirs(sava_path, exist_ok=True)
+        os.makedirs(sava_path/episode_path.name, exist_ok=True)
+        video_paths = {}
+        for image_key in image_keys:
+            h_ori, w_ori =  image_observations[image_key][0].shape[:2]
+            if w_ori == 1280:
+                w_tgt = w_ori//2
+                h_tgt = h_ori//2
+            else:
+                w_tgt = w_ori
+                h_tgt = h_ori
+            imageio.mimsave(
+                sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4', 
+                image_observations[image_key], 
+                fps=target_fps,
+                codec="libsvtav1",
+                # codec="libx264",
+                ffmpeg_params=[
+                    "-crf", "28",            # 画质控制（0-63，默认30）
+                    "-preset", "8",          # 速度预设（0-13，值越高越快但压缩率越低）
+                    # "-g", "240",             # 关键帧间隔（建议 ≥ fps 的 8 倍）
+                    "-pix_fmt", "yuv420p",   # 兼容性像素格式
+                    "-movflags", "+faststart", # 将元数据移到文件开头，便于网络播放
+                    # "-threads", "8",           # 使用的线程数
+                    "-vf", f"scale={w_tgt}:{h_tgt}",
+                    "-y",                      # 覆盖已存在的输出文件
+                ]
+            )
+            video_paths[image_key] = sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4'
+        print(f"imageio.mimsave time taken of {episode_path}")
+
+        return {
+            "frames": frames,
+            "videos": video_paths,
+        }
+
+    except Exception as e:
+        logging.error(f"Failed to load or process LMDB data: {e}")
+        return None
+
+
+def get_all_tasks(src_path: Path, output_path: Path) -> Tuple[Path, Path]:
+    output_path.mkdir(exist_ok=True)
+    yield (src_path, output_path)
+
+def compute_episode_stats(episode_data: Dict[str, List[str] | np.ndarray], features: Dict) -> Dict:
+    ep_stats = {}
+    for key, data in episode_data.items():
+        if features[key]["dtype"] == "string":
+            continue
+        elif features[key]["dtype"] in ["image", "video"]:
+            ep_ft_array = sample_images(data)
+            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            keepdims = True
+        else:
+            ep_ft_array = data  # data is already a np.ndarray
+            axes_to_reduce = 0  # compute stats over the first axis
+            keepdims = data.ndim == 1  # keep as np.array
+
+        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        if features[key]["dtype"] in ["image", "video"]:
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
+            }
+    return ep_stats
+
+def sample_images(input):
+    if type(input) is str:
+        video_path = input
+        reader = torchvision.io.VideoReader(video_path, stream="video")
+        frames = [frame["data"] for frame in reader]
+        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    elif type(input) is np.ndarray:
+        frames_array = input[:, None, :, :]  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    return images
+
+
+def load_local_dataset(episode_path: str, save_path:str, origin_fps=30, target_fps=30):
+    fps_factor = origin_fps // target_fps
+    # print(f"fps downsample factor: {fps_factor}")
+    # logging.info(f"fps downsample factor: {fps_factor}")
+    # for format_str in [f"{episode_id:07d}", f"{episode_id:06d}", str(episode_id)]:
+    #     episode_path = Path(src_path) / format_str
+    #     save_path = Path(save_path) / format_str
+    #     if episode_path.exists():
+    #         break
+    # else:
+    #     logging.warning(f"Episode directory not found for ID {episode_id}")
+    #     return None, None
+    episode_path = Path(episode_path)
+    if not episode_path.exists():
+        logging.warning(f"{episode_path} does not exist")
+        return None, None
+        
+    if not (episode_path / "lmdb/data.mdb").exists():
+        logging.warning(f"LMDB data not found for episode {episode_path}")
+        return None, None
+    
+    raw_dataset = load_lmdb_data(episode_path, save_path, fps_factor, target_fps)
+    if raw_dataset is None:
+        return None, None
+    frames = raw_dataset["frames"] # states, actions, task
+    videos = raw_dataset["videos"] # image paths
+    ## check the frames
+    for camera_name, video_path in videos.items():
+        if not os.path.exists(video_path):
+            logging.error(f"Video file {video_path} does not exist.")
+            print(f"Camera {camera_name} Video file {video_path} does not exist.")
+            return None, None
+    return frames, videos
+
+
+def save_as_lerobot_dataset(task: tuple[Path, Path], repo_id, num_threads, debug, origin_fps=30,  target_fps=30, num_demos=None, robot_type="piper", delete_downsampled_videos=True):
+    src_path, save_path = task
+    print(f"**Processing collected** {src_path}")
+    print(f"**saving to** {save_path}")
+    if save_path.exists():
+        print(f"Output directory {save_path} already exists. Deleting it.")
+        logging.warning(f"Output directory {save_path} already exists. Deleting it.")
+        shutil.rmtree(save_path)
+        # print(f"Output directory {save_path} already exists.")
+        # return 
+
+    dataset = FrankaDataset.create(
+        repo_id=f"{repo_id}",
+        root=save_path,
+        fps=target_fps,
+        robot_type=robot_type,
+        features=FEATURES,
+    )
+    all_episode_paths = sorted([f.as_posix() for f in src_path.glob(f"*") if f.is_dir()])
+    if num_demos is not None:
+        all_episode_paths = all_episode_paths[:num_demos]
+    # all_subdir_eids = [int(Path(path).name) for path in all_subdir]
+    if debug:
+        for i in range(1):
+            frames, videos = load_local_dataset(episode_path=all_episode_paths[i], save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+            if frames is None or videos is None:
+                print(f"Skipping episode {all_episode_paths[i]} due to missing data.")
+                continue
+            for frame_data in frames:
+                dataset.add_frame(frame_data)
+            dataset.save_episode(videos=videos)
+            if delete_downsampled_videos:
+                for _, video_path in videos.items():
+                    parent_dir = os.path.dirname(video_path)
+                    try:
+                        shutil.rmtree(parent_dir)
+                        # os.remove(video_path)
+                        # print(f"Successfully deleted: {parent_dir}")
+                        print(f"Successfully deleted: {video_path}")
+                    except Exception as e:
+                        pass  # Handle the case where the directory might not exist or is already deleted
+
+    else:
+        counter_episodes_uncomplete = 0
+        for batch_index in range(len(all_episode_paths)//num_threads+1):
+            batch_episode_paths = all_episode_paths[batch_index*num_threads:(batch_index+1)*num_threads]
+            if len(batch_episode_paths) == 0:
+                continue
+            with ThreadPoolExecutor(max_workers=num_threads) as executor:
+                futures = []
+                for episode_path in batch_episode_paths:
+                    print("starting to process episode: ", episode_path)
+                    futures.append(
+                        executor.submit(load_local_dataset, episode_path=episode_path, save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+                    )
+                for raw_dataset in as_completed(futures):
+                    frames, videos = raw_dataset.result()
+                    if frames is None or videos is None:
+                        counter_episodes_uncomplete += 1
+                        print(f"Skipping episode {episode_path} due to missing data.")
+                        continue
+                    for frame_data in frames:
+                        dataset.add_frame(frame_data)
+                    dataset.save_episode(videos=videos)
+                    gc.collect()
+                    print(f"finishing processed {videos}")
+                    if delete_downsampled_videos:
+                        for _, video_path in videos.items():
+                            # Get the parent directory of the video
+                            parent_dir = os.path.dirname(video_path)
+                            try:
+                                shutil.rmtree(parent_dir)
+                                print(f"Successfully deleted: {parent_dir}")
+                            except Exception as e:
+                                pass
+        print("counter_episodes_uncomplete:", counter_episodes_uncomplete)
+
+def main(src_path, save_path, repo_id, num_threads=4, debug=False, origin_fps=30, target_fps=30, num_demos=None):
+    logging.info("Scanning for episodes...")
+    tasks = get_all_tasks(src_path, save_path)
+    if debug:
+        task = next(tasks)
+        save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+    else:
+        for task in tasks:
+            save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert collected data from Piper to Lerobot format.")
+    parser.add_argument(
+        "--src_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/raw_data/agilex_split_aloha/",
+        help="Path to the input file containing collected data in Piper format.",
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/lerobot_v2_1/agilex_split_aloha/",
+        help="Path to the output file where the converted Lerobot format will be saved.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run in debug mode with limited episodes",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=64,
+        help="Number of threads per process",
+    )
+    # parser.add_argument(
+    #     "--task_name",
+    #     type=str,
+    #     required=True,
+    #     default="Pick_up_the_marker_and_put_it_into_the_pen_holder",
+    #     help="Name of the task to be processed. Default is 'Pick_up_the_marker_and_put_it_into_the_pen_holder'.",
+    # )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        # required=True,
+        default="SplitAloha_20250714",
+        help="identifier for the dataset repository.",
+    )
+    parser.add_argument(
+        "--origin_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the obervation video. Default is 30.",
+    )
+    parser.add_argument(
+        "--target_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the downsample video. Default is 30.",
+    )
+    parser.add_argument(
+        "--num_demos",
+        type=int,
+        default=None,
+        help="Demos need to transfer"
+    )
+    args = parser.parse_args()
+    assert int(args.origin_fps) % int(args.target_fps) == 0, "origin_fps must be an integer multiple of target_fps"
+    start_time = time.time()
+    main(
+        src_path=Path(args.src_path),
+        save_path=Path(args.save_path),
+        repo_id=args.repo_id,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        origin_fps=args.origin_fps,
+        target_fps=args.target_fps,
+        num_demos=args.num_demos,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Total time taken: {elapsed_time:.2f} seconds")
--- a/policy/lmdb2lerobotv21/lmdb2lerobot_frankarobotiq_a1.py
+++ b/policy/lmdb2lerobotv21/lmdb2lerobot_frankarobotiq_a1.py
@@ -0,0 +1,800 @@
+import argparse
+import json
+import logging
+import os
+import gc
+import shutil
+import torchvision
+import cv2
+import h5py
+import lmdb
+import numpy as np
+import pickle
+import torch
+import pinocchio as pin
+import time
+import ray
+import logging
+import pdb
+import os
+import imageio # imageio-ffmpeg
+
+from PIL import Image
+from tqdm import tqdm
+from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import check_timestamps_sync, get_episode_data_index, validate_episode_buffer
+from ray.runtime_env import RuntimeEnv
+from scipy.spatial.transform import Rotation as R
+from copy import deepcopy
+from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple
+
+"""
+    Store both camera image and robot state as a combined observation. 
+    Args:
+        observation: images(camera), states (robot state)
+        actions: joint, gripper, ee_pose
+"""
+FEATURES = {
+    "images.rgb.head": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "head_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "head_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "states.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "states.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+    "states.ee_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.tcp_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "states.robot_to_env_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "actions.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "actions.gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "actions.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+    "actions.ee_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "actions.tcp_to_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "master_actions.joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6",],
+    },
+    "master_actions.gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "master_actions.gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["gripper_0",],
+    },
+    "master_actions.gripper.pose": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["x", "y", "z", "roll", "pitch", "yaw",],
+    },
+}
+
+class FrankaDataset(LeRobotDataset):
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        download_videos: bool = True,
+        local_files_only: bool = False,
+        video_backend: str | None = None,
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            root=root,
+            episodes=episodes,
+            image_transforms=image_transforms,
+            delta_timestamps=delta_timestamps,
+            tolerance_s=tolerance_s,
+            download_videos=download_videos,
+            local_files_only=local_files_only,
+            video_backend=video_backend,
+        )
+
+    def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
+        if not episode_data:
+            episode_buffer = self.episode_buffer
+
+        validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+        episode_length = episode_buffer.pop("size")
+        tasks = episode_buffer.pop("task")
+        episode_tasks = list(set(tasks))
+        episode_index = episode_buffer["episode_index"]
+
+        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
+        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
+
+        for task in episode_tasks:
+            task_index = self.meta.get_task_index(task)
+            if task_index is None:
+                self.meta.add_task(task)
+
+        episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
+        for key, ft in self.features.items():
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
+                continue
+            episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
+        for key in self.meta.video_keys:
+            video_path = self.root / self.meta.get_video_file_path(episode_index, key)
+            episode_buffer[key] = str(video_path)  # PosixPath -> str
+            video_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copyfile(videos[key], video_path)
+        ep_stats = compute_episode_stats(episode_buffer, self.features)
+        self._save_episode_table(episode_buffer, episode_index)
+        self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
+        ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
+        ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
+        check_timestamps_sync(
+            episode_buffer["timestamp"],
+            episode_buffer["episode_index"],
+            ep_data_index_np,
+            self.fps,
+            self.tolerance_s,
+        )
+        if not episode_data:
+            self.episode_buffer = self.create_episode_buffer()
+
+
+    def add_frame(self, frame: dict) -> None:
+        for name in frame:
+            if isinstance(frame[name], torch.Tensor):
+                frame[name] = frame[name].numpy()
+        features = {key: value for key, value in self.features.items() if key in self.hf_features}
+        if self.episode_buffer is None:
+            self.episode_buffer = self.create_episode_buffer()
+        frame_index = self.episode_buffer["size"]
+        timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(timestamp)
+
+        for key in frame:
+            if key == "task":
+                self.episode_buffer["task"].append(frame["task"])
+                continue
+            if key not in self.features:
+                raise ValueError(f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'.")
+            self.episode_buffer[key].append(frame[key])
+        self.episode_buffer["size"] += 1
+
+# def crop_resize_no_padding(image, target_size=(480, 640)):
+#     """
+#     Crop and scale to target size (no padding)
+#     :param image: input image (NumPy array)
+#     :param target_size: target size (height, width)
+#     :return: processed image
+#     """
+#     h, w = image.shape[:2]
+#     target_h, target_w = target_size
+#     target_ratio = target_w / target_h  # Target aspect ratio (e.g. 640/480=1.333)
+
+#     # the original image aspect ratio and cropping direction
+#     if w / h > target_ratio:  # Original image is wider → crop width
+#         crop_w = int(h * target_ratio)  # Calculate crop width based on target aspect ratio
+#         crop_h = h
+#         start_x = (w - crop_w) // 2  # Horizontal center starting point
+#         start_y = 0
+#     else:  # Original image is higher → crop height
+#         crop_h = int(w / target_ratio)  # Calculate clipping height according to target aspect ratio
+#         crop_w = w
+#         start_x = 0
+#         start_y = (h - crop_h) // 2  # Vertical center starting point
+
+#     # Perform centered cropping (to prevent out-of-bounds)
+#     start_x, start_y = max(0, start_x), max(0, start_y)
+#     end_x, end_y = min(w, start_x + crop_w), min(h, start_y + crop_h)
+#     cropped = image[start_y:end_y, start_x:end_x]
+
+#     # Resize to target size (bilinear interpolation)
+#     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+#     return resized
+
+def tf2xyzwxyz(posetf):
+    translation = posetf[:3, 3]
+    orientation = R.from_matrix(posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+    xyzwxyz = (np.concatenate([translation, orientation])).astype("float32")
+
+    return xyzwxyz
+
+def load_lmdb_data(episode_path: Path, sava_path: Path, fps_factor: int, target_fps: int) -> Optional[Dict]:
+    def load_image(txn, key):
+        raw = txn.get(key)
+        data = pickle.loads(raw)
+        image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        # Convert to RGB if necessary
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # image = crop_resize_no_padding(image, target_size=(480, 640))
+        return image
+    armbase_to_robot_pose = np.eye(4)
+    rot_x = np.eye(4)
+    rot_x[1][1] = -1.0
+    rot_x[2][2] = -1.0
+    tcp2ee_pose = np.eye(4)
+    tcp2ee_pose[2, 3] = 0.145
+
+    model = pin.buildModelFromUrdf("../assets/franka_robotiq/frankarobotiq.urdf")
+    data = model.createData()
+
+    try:
+        env = lmdb.open(
+            str(episode_path / "lmdb"),
+            readonly=True,
+            lock=False,
+            max_readers=128,
+            readahead=False
+        )
+        meta_info = pickle.load(open(episode_path/"meta_info.pkl", "rb"))
+        with env.begin(write=False) as txn:
+            keys = [k for k, _ in txn.cursor()]
+            # import pdb; pdb.set_trace()
+            qpos_keys = ['states.joint.position', 'states.gripper.position', 'states.gripper.pose',]
+            master_action_keys = ['master_actions.joint.position', 'master_actions.gripper.position', 'master_actions.gripper.openness', 'master_actions.gripper.pose',]
+            action_keys = ['actions.joint.position', 'actions.gripper.position', 'actions.gripper.openness', 'actions.gripper.pose',]
+            image_keys = ['images.rgb.head', 'images.rgb.hand',]
+            compute_qpos_keys = ['states.joint.position']
+            additional_action_keys = ["actions.ee_to_armbase_pose", "actions.ee_to_robot_pose", "actions.tcp_to_armbase_pose", "actions.tcp_to_robot_pose"]
+            robot2env_keys = ['robot2env_pose']
+            intrinsics_keys = ['json_data']
+            camera2env_keys = ["camera2env_pose.head", "camera2env_pose.hand"]
+            total_steps = []
+            for image_key in image_keys:
+                keys_image_per_step = meta_info['keys'][image_key]
+                total_steps.append(len(keys_image_per_step))
+            
+            state_action_dict = {}
+            ### qpos
+            for key in qpos_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            state_keys = list(state_action_dict.keys())
+            # ### next qpos as action
+            # for k in state_keys:
+            #     state_action_dict[k.replace("states", "actions")] = np.concatenate([state_action_dict[k][1:, :], state_action_dict[k][-1, :][None,:]], axis=0)
+            ### master action
+            for key in master_action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                #if "openness" in key:
+                if len(state_action_dict[key].shape)==1:
+                    state_action_dict[key] = state_action_dict[key][:, np.newaxis]
+                total_steps.append(len(state_action_dict[key]))
+            ### action
+            for key in action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                #if "openness" in key:
+                if len(state_action_dict[key].shape)==1:
+                    state_action_dict[key] = state_action_dict[key][:, np.newaxis]
+                total_steps.append(len(state_action_dict[key]))
+            ### ee & tcp pose proprio
+            for compute_qpos_key in compute_qpos_keys:
+                compute_qpos = pickle.loads(txn.get(compute_qpos_key.encode()))
+                ee_to_armbase_poses = []
+                ee_to_robot_poses = []
+                tcp_to_armbase_poses = []
+                tcp_to_robot_poses = []                
+                for each_compute_qpos in compute_qpos:
+                    q = np.zeros(model.nq)   # 关节角
+                    ndim = each_compute_qpos.shape[0]
+                    q[:ndim] = each_compute_qpos
+                    pin.forwardKinematics(model, data, q)
+                    pin.updateFramePlacements(model, data)
+
+                    fid_a = model.getFrameId("base_link")
+                    fid_b = model.getFrameId("panda_link8")
+
+                    T_a = data.oMf[fid_a]    # world -> a
+                    T_b = data.oMf[fid_b]    # world -> b
+                    T_a_b = T_a.inverse() * T_b
+
+                    ee2a_translation = T_a_b.homogeneous[:3, 3]
+                    ee2a_orientation = R.from_matrix(T_a_b.homogeneous[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_armbase_pose = (np.concatenate([ee2a_translation, ee2a_orientation])).astype("float32")
+                    ee_to_armbase_poses.append(ee_to_armbase_pose)
+
+                    tcp_to_arm_base_posetf = T_a_b.homogeneous @ tcp2ee_pose
+                    tcp_to_arm_base_translation = tcp_to_arm_base_posetf[:3, 3]
+                    tcp_to_arm_base_orientation = R.from_matrix(tcp_to_arm_base_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_armbase_pose = (np.concatenate([tcp_to_arm_base_translation, tcp_to_arm_base_orientation])).astype("float32")
+                    tcp_to_armbase_poses.append(tcp_to_armbase_pose)
+                    ee_to_robot_posetf = armbase_to_robot_pose @ T_a_b.homogeneous
+                    ee2r_translation = ee_to_robot_posetf[:3, 3]
+                    ee2r_orientation = R.from_matrix(ee_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_robot_pose = (np.concatenate([ee2r_translation, ee2r_orientation])).astype("float32")
+                    ee_to_robot_poses.append(ee_to_robot_pose)
+                    
+                    tcp_to_robot_posetf = ee_to_robot_posetf @ tcp2ee_pose
+                    tcp_to_robot_translation = tcp_to_robot_posetf[:3, 3]
+                    tcp_to_robot_orientation = R.from_matrix(tcp_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_robot_pose = (np.concatenate([tcp_to_robot_translation, tcp_to_robot_orientation])).astype("float32")
+                    tcp_to_robot_poses.append(tcp_to_robot_pose)
+
+                ee2a_key = f"states.ee_to_armbase_pose"
+                ee2r_key = f"states.ee_to_robot_pose"
+                tcp2a_key = f"states.tcp_to_armbase_pose"
+                tcp2r_key = f"states.tcp_to_robot_pose"
+
+                state_action_dict[ee2a_key] = np.stack(ee_to_armbase_poses)
+                state_action_dict[ee2r_key] = np.stack(ee_to_robot_poses)
+                state_action_dict[tcp2a_key] = np.stack(tcp_to_armbase_poses)
+                state_action_dict[tcp2r_key] = np.stack(tcp_to_robot_poses)
+            ### ee & tcp pose action
+            for additional_action_key in additional_action_keys:
+                additional_state_key = additional_action_key.replace("actions", "states")
+                additional_state = state_action_dict[additional_state_key]
+                additional_action = np.concatenate([additional_state[1:, :], additional_state[-1, :][None,:]], axis=0)
+                state_action_dict[additional_action_key] = additional_action
+            ### intrinsics pose 
+            num_steps = (state_action_dict[ee2a_key]).shape[0]
+            for intrinsics_key in intrinsics_keys:
+                intrinsics_params = pickle.loads(txn.get(intrinsics_key.encode()))
+                hand_camera_params = intrinsics_params["hand_camera_params"]
+                hand_camera_params = (np.array([hand_camera_params[0][0], hand_camera_params[1][1], hand_camera_params[0][2], hand_camera_params[1][2]])).astype("float32")
+                head_camera_params = intrinsics_params["head_camera_params"]
+                head_camera_params = (np.array([head_camera_params[0][0], head_camera_params[1][1], head_camera_params[0][2], head_camera_params[1][2]])).astype("float32")
+                if head_camera_params[2] >= 500:
+                    head_camera_params /= 2
+                state_action_dict["head_camera_intrinsics"] = np.stack([head_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_camera_intrinsics"] = np.stack([hand_camera_params for _ in range(num_steps)])
+            ### robot2env pose
+            for robot2env_key in robot2env_keys:
+                robot2env_pose_tfs = pickle.loads(txn.get(robot2env_key.encode()))
+                robot2env_pose_7ds = []
+                for robot2env_pose_tf in robot2env_pose_tfs:
+                    translation = robot2env_pose_tf[:3, 3]
+                    orientation = R.from_matrix(robot2env_pose_tf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    robot2env_pose_7d = (np.concatenate([translation, orientation])).astype("float32")
+                    robot2env_pose_7ds.append(robot2env_pose_7d)
+                state_action_dict[robot2env_key] = np.stack(robot2env_pose_7ds)
+            ### camera2env pose
+            for camera2env_key in camera2env_keys:
+                camera2env_pose_tfs = pickle.loads(txn.get(camera2env_key.encode()))
+                camera2robot_poses = []
+                for frame_idx in range(len(camera2env_pose_tfs)):
+                    camera2env_posetf = camera2env_pose_tfs[frame_idx]
+                    robot2env_pose_tf = robot2env_pose_tfs[frame_idx]
+                    camera2robot_pose_tf = np.linalg.inv(robot2env_pose_tf) @ camera2env_posetf @ rot_x
+                    camera2robot_poses.append(tf2xyzwxyz(camera2robot_pose_tf))
+                
+                if camera2env_key == "camera2env_pose.head":
+                    state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand":
+                     state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+
+            unique_steps = list(set(total_steps))
+            # import pdb; pdb.set_trace()
+            print("episode_path:", episode_path)
+            print("total_steps: ", total_steps)
+            assert len(unique_steps) == 1 and unique_steps[0]>0, f"no data found or qpos / image steps mismatch in {episode_path}"
+            assert unique_steps[0]>100, f"Episode length of {episode_path} is {unique_steps[0]}, which does not meet requirements"
+            assert np.max(np.abs(state_action_dict["states.joint.position"])) < 2*np.pi
+            selected_steps = [step for step in range(unique_steps[0]) if step % fps_factor == 0]
+            frames = []
+            image_observations = {}
+            for image_key in image_keys:
+                image_observations[image_key] = []
+            start_time = time.time()
+            for step_index, step in enumerate(selected_steps):
+                step_str = f"{step:04d}"
+                data_dict = {}
+                for key, value in state_action_dict.items():
+                    # if "forlan2robot_pose" in key:
+                    #     data_dict["states.ee_to_armbase_pose"] = value[step]
+                    if "robot2env_pose" in key:
+                        data_dict["states.robot_to_env_pose"] = value[step]
+                    else:
+                        data_dict[key] = value[step]
+                data_dict["task"] = meta_info['language_instruction']
+                frames.append(data_dict)
+                # import pdb; pdb.set_trace()
+                for image_key in image_keys:
+                    image_key_step_encode = f"{image_key}/{step_str}".encode()
+                    if not image_key_step_encode in keys:
+                        raise ValueError(f"Image key {image_key_step_encode} not found in LMDB keys.")
+                    image_observations[image_key].append(load_image(txn, image_key_step_encode))
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"load image_observations of {episode_path}")
+        env.close()
+        if not frames:
+            return None
+        os.makedirs(sava_path, exist_ok=True)
+        os.makedirs(sava_path/episode_path.name, exist_ok=True)
+        video_paths = {}
+        for image_key in image_keys:
+            h_ori, w_ori =  image_observations[image_key][0].shape[:2]
+            if w_ori == 1280:
+                w_tgt = w_ori//2
+                h_tgt = h_ori//2
+            else:
+                w_tgt = w_ori
+                h_tgt = h_ori
+            imageio.mimsave(
+                sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4', 
+                image_observations[image_key], 
+                fps=target_fps,
+                codec="libsvtav1",
+                # codec="libx264",
+                ffmpeg_params=[
+                    "-crf", "28",            # 画质控制（0-63，默认30）
+                    "-preset", "8",          # 速度预设（0-13，值越高越快但压缩率越低）
+                    # "-g", "240",             # 关键帧间隔（建议 ≥ fps 的 8 倍）
+                    "-pix_fmt", "yuv420p",   # 兼容性像素格式
+                    "-movflags", "+faststart", # 将元数据移到文件开头，便于网络播放
+                    # "-threads", "8",           # 使用的线程数
+                    "-vf", f"scale={w_tgt}:{h_tgt}",
+                    "-y",                      # 覆盖已存在的输出文件
+                ]
+            )
+            video_paths[image_key] = sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4'
+        print(f"imageio.mimsave time taken of {episode_path}")
+
+        return {
+            "frames": frames,
+            "videos": video_paths,
+        }
+
+    except Exception as e:
+        logging.error(f"Failed to load or process LMDB data: {e}")
+        return None
+
+
+def get_all_tasks(src_path: Path, output_path: Path) -> Tuple[Path, Path]:
+    output_path.mkdir(exist_ok=True)
+    yield (src_path, output_path)
+
+def compute_episode_stats(episode_data: Dict[str, List[str] | np.ndarray], features: Dict) -> Dict:
+    ep_stats = {}
+    for key, data in episode_data.items():
+        if features[key]["dtype"] == "string":
+            continue
+        elif features[key]["dtype"] in ["image", "video"]:
+            ep_ft_array = sample_images(data)
+            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            keepdims = True
+        else:
+            ep_ft_array = data  # data is already a np.ndarray
+            axes_to_reduce = 0  # compute stats over the first axis
+            keepdims = data.ndim == 1  # keep as np.array
+
+        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        if features[key]["dtype"] in ["image", "video"]:
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
+            }
+    return ep_stats
+
+def sample_images(input):
+    if type(input) is str:
+        video_path = input
+        reader = torchvision.io.VideoReader(video_path, stream="video")
+        frames = [frame["data"] for frame in reader]
+        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    elif type(input) is np.ndarray:
+        frames_array = input[:, None, :, :]  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    return images
+
+
+def load_local_dataset(episode_path: str, save_path:str, origin_fps=30, target_fps=30):
+    fps_factor = origin_fps // target_fps
+    # print(f"fps downsample factor: {fps_factor}")
+    # logging.info(f"fps downsample factor: {fps_factor}")
+    # for format_str in [f"{episode_id:07d}", f"{episode_id:06d}", str(episode_id)]:
+    #     episode_path = Path(src_path) / format_str
+    #     save_path = Path(save_path) / format_str
+    #     if episode_path.exists():
+    #         break
+    # else:
+    #     logging.warning(f"Episode directory not found for ID {episode_id}")
+    #     return None, None
+    episode_path = Path(episode_path)
+    if not episode_path.exists():
+        logging.warning(f"{episode_path} does not exist")
+        return None, None
+        
+    if not (episode_path / "lmdb/data.mdb").exists():
+        logging.warning(f"LMDB data not found for episode {episode_path}")
+        return None, None
+    
+    raw_dataset = load_lmdb_data(episode_path, save_path, fps_factor, target_fps)
+    if raw_dataset is None:
+        return None, None
+    frames = raw_dataset["frames"] # states, actions, task
+    videos = raw_dataset["videos"] # image paths
+    ## check the frames
+    for camera_name, video_path in videos.items():
+        if not os.path.exists(video_path):
+            logging.error(f"Video file {video_path} does not exist.")
+            print(f"Camera {camera_name} Video file {video_path} does not exist.")
+            return None, None
+    return frames, videos
+
+
+def save_as_lerobot_dataset(task: tuple[Path, Path], repo_id, num_threads, debug, origin_fps=30,  target_fps=30, num_demos=None, robot_type="Franka", delete_downsampled_videos=True):
+    src_path, save_path = task
+    print(f"**Processing collected** {src_path}")
+    print(f"**saving to** {save_path}")
+    if save_path.exists():
+        print(f"Output directory {save_path} already exists. Deleting it.")
+        logging.warning(f"Output directory {save_path} already exists. Deleting it.")
+        shutil.rmtree(save_path)
+        # print(f"Output directory {save_path} already exists.")
+        # return 
+
+    dataset = FrankaDataset.create(
+        repo_id=f"{repo_id}",
+        root=save_path,
+        fps=target_fps,
+        robot_type=robot_type,
+        features=FEATURES,
+    )
+    all_episode_paths = sorted([f.as_posix() for f in src_path.glob(f"*") if f.is_dir()])
+    if num_demos is not None:
+        all_episode_paths = all_episode_paths[:num_demos]
+    # all_subdir_eids = [int(Path(path).name) for path in all_subdir]
+    if debug:
+        for i in range(1):
+            frames, videos = load_local_dataset(episode_path=all_episode_paths[i], save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+            if frames is None or videos is None:
+                print(f"Skipping episode {all_episode_paths[i]} due to missing data.")
+                continue
+            for frame_data in frames:
+                dataset.add_frame(frame_data)
+            dataset.save_episode(videos=videos)
+            if delete_downsampled_videos:
+                for _, video_path in videos.items():
+                    parent_dir = os.path.dirname(video_path)
+                    try:
+                        shutil.rmtree(parent_dir)
+                        # os.remove(video_path)
+                        # print(f"Successfully deleted: {parent_dir}")
+                        print(f"Successfully deleted: {video_path}")
+                    except Exception as e:
+                        pass  # Handle the case where the directory might not exist or is already deleted
+
+    else:
+        counter_episodes_uncomplete = 0
+        for batch_index in range(len(all_episode_paths)//num_threads+1):
+            batch_episode_paths = all_episode_paths[batch_index*num_threads:(batch_index+1)*num_threads]
+            if len(batch_episode_paths) == 0:
+                continue
+            with ThreadPoolExecutor(max_workers=num_threads) as executor:
+                futures = []
+                for episode_path in batch_episode_paths:
+                    print("starting to process episode: ", episode_path)
+                    futures.append(
+                        executor.submit(load_local_dataset, episode_path=episode_path, save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+                    )
+                for raw_dataset in as_completed(futures):
+                    frames, videos = raw_dataset.result()
+                    if frames is None or videos is None:
+                        counter_episodes_uncomplete += 1
+                        print(f"Skipping episode {episode_path} due to missing data.")
+                        continue
+                    for frame_data in frames:
+                        dataset.add_frame(frame_data)
+                    dataset.save_episode(videos=videos)
+                    gc.collect()
+                    print(f"finishing processed {videos}")
+                    if delete_downsampled_videos:
+                        for _, video_path in videos.items():
+                            # Get the parent directory of the video
+                            parent_dir = os.path.dirname(video_path)
+                            try:
+                                shutil.rmtree(parent_dir)
+                                print(f"Successfully deleted: {parent_dir}")
+                            except Exception as e:
+                                pass
+        print("counter_episodes_uncomplete:", counter_episodes_uncomplete)
+
+def main(src_path, save_path, repo_id, num_threads=4, debug=False, origin_fps=30, target_fps=30, num_demos=None):
+    logging.info("Scanning for episodes...")
+    tasks = get_all_tasks(src_path, save_path)
+    if debug:
+        task = next(tasks)
+        save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+    else:
+        for task in tasks:
+            save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert collected data from Piper to Lerobot format.")
+    parser.add_argument(
+        "--src_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/raw_data/agilex_split_aloha/",
+        help="Path to the input file containing collected data in Piper format.",
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/lerobot_v2_1/agilex_split_aloha/",
+        help="Path to the output file where the converted Lerobot format will be saved.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run in debug mode with limited episodes",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=64,
+        help="Number of threads per process",
+    )
+    # parser.add_argument(
+    #     "--task_name",
+    #     type=str,
+    #     required=True,
+    #     default="Pick_up_the_marker_and_put_it_into_the_pen_holder",
+    #     help="Name of the task to be processed. Default is 'Pick_up_the_marker_and_put_it_into_the_pen_holder'.",
+    # )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        # required=True,
+        default="SplitAloha_20250714",
+        help="identifier for the dataset repository.",
+    )
+    parser.add_argument(
+        "--origin_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the obervation video. Default is 30.",
+    )
+    parser.add_argument(
+        "--target_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the downsample video. Default is 30.",
+    )
+    parser.add_argument(
+        "--num_demos",
+        type=int,
+        default=None,
+        help="Demos need to transfer"
+    )
+    args = parser.parse_args()
+    assert int(args.origin_fps) % int(args.target_fps) == 0, "origin_fps must be an integer multiple of target_fps"
+    start_time = time.time()
+    main(
+        src_path=Path(args.src_path),
+        save_path=Path(args.save_path),
+        repo_id=args.repo_id,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        origin_fps=args.origin_fps,
+        target_fps=args.target_fps,
+        num_demos=args.num_demos,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Total time taken: {elapsed_time:.2f} seconds")
--- a/policy/lmdb2lerobotv21/lmdb2lerobot_genie1_a1.py
+++ b/policy/lmdb2lerobotv21/lmdb2lerobot_genie1_a1.py
@@ -0,0 +1,904 @@
+import argparse
+import json
+import logging
+import os
+import gc
+import shutil
+import torchvision
+import cv2
+import h5py
+import lmdb
+import numpy as np
+import pickle
+import torch
+import pinocchio as pin
+import time
+import ray
+import logging
+import pdb
+import os
+import imageio # imageio-ffmpeg
+
+from PIL import Image
+from tqdm import tqdm
+from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import check_timestamps_sync, get_episode_data_index, validate_episode_buffer
+from ray.runtime_env import RuntimeEnv
+from scipy.spatial.transform import Rotation as R
+from copy import deepcopy
+from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple
+from pdb import set_trace
+"""
+    Store both camera image and robot state as a combined observation. 
+    Args:
+        observation: images(camera), states (robot state)
+        actions: joint, gripper, ee_pose
+"""
+FEATURES = {
+    "images.rgb.head": {
+        "dtype": "video",
+        "shape": (360, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_left": {
+        "dtype": "video",
+        "shape": (480, 848, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_right": {
+        "dtype": "video",
+        "shape": (480, 848, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "head_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_left_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_right_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "head_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_left_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_right_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5", "left_joint_6"],
+    },
+    "states.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "states.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },   
+    "states.right_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5","right_joint_6"],
+    },
+    "states.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+    "states.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.robot_to_env_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5","left_joint_6"],
+    },
+    "actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "actions.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "actions.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5","right_joint_6"],
+    },
+    "actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "actions.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "master_actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5","left_joint_6"],
+    },
+    "master_actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.left_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5","right_joint_6"],
+    },
+    "master_actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "master_actions.right_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+}
+
+class Genie1Dataset(LeRobotDataset):
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        download_videos: bool = True,
+        local_files_only: bool = False,
+        video_backend: str | None = None,
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            root=root,
+            episodes=episodes,
+            image_transforms=image_transforms,
+            delta_timestamps=delta_timestamps,
+            tolerance_s=tolerance_s,
+            download_videos=download_videos,
+            local_files_only=local_files_only,
+            video_backend=video_backend,
+        )
+
+    def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
+        if not episode_data:
+            episode_buffer = self.episode_buffer
+
+        validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+        episode_length = episode_buffer.pop("size")
+        tasks = episode_buffer.pop("task")
+        episode_tasks = list(set(tasks))
+        episode_index = episode_buffer["episode_index"]
+
+        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
+        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
+
+        for task in episode_tasks:
+            task_index = self.meta.get_task_index(task)
+            if task_index is None:
+                self.meta.add_task(task)
+
+        episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
+        for key, ft in self.features.items():
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
+                continue
+            episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
+        for key in self.meta.video_keys:
+            video_path = self.root / self.meta.get_video_file_path(episode_index, key)
+            episode_buffer[key] = str(video_path)  # PosixPath -> str
+            video_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copyfile(videos[key], video_path)
+        ep_stats = compute_episode_stats(episode_buffer, self.features)
+        self._save_episode_table(episode_buffer, episode_index)
+        self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
+        ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
+        ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
+        check_timestamps_sync(
+            episode_buffer["timestamp"],
+            episode_buffer["episode_index"],
+            ep_data_index_np,
+            self.fps,
+            self.tolerance_s,
+        )
+        if not episode_data:
+            self.episode_buffer = self.create_episode_buffer()
+
+
+    def add_frame(self, frame: dict) -> None:
+        for name in frame:
+            if isinstance(frame[name], torch.Tensor):
+                frame[name] = frame[name].numpy()
+        features = {key: value for key, value in self.features.items() if key in self.hf_features}
+        if self.episode_buffer is None:
+            self.episode_buffer = self.create_episode_buffer()
+        frame_index = self.episode_buffer["size"]
+        timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(timestamp)
+
+        for key in frame:
+            if key == "task":
+                self.episode_buffer["task"].append(frame["task"])
+                continue
+            if key not in self.features:
+                raise ValueError(f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'.")
+            self.episode_buffer[key].append(frame[key])
+        self.episode_buffer["size"] += 1
+
+# def crop_resize_no_padding(image, target_size=(480, 640)):
+#     """
+#     Crop and scale to target size (no padding)
+#     :param image: input image (NumPy array)
+#     :param target_size: target size (height, width)
+#     :return: processed image
+#     """
+#     h, w = image.shape[:2]
+#     target_h, target_w = target_size
+#     target_ratio = target_w / target_h  # Target aspect ratio (e.g. 640/480=1.333)
+
+#     # the original image aspect ratio and cropping direction
+#     if w / h > target_ratio:  # Original image is wider → crop width
+#         crop_w = int(h * target_ratio)  # Calculate crop width based on target aspect ratio
+#         crop_h = h
+#         start_x = (w - crop_w) // 2  # Horizontal center starting point
+#         start_y = 0
+#     else:  # Original image is higher → crop height
+#         crop_h = int(w / target_ratio)  # Calculate clipping height according to target aspect ratio
+#         crop_w = w
+#         start_x = 0
+#         start_y = (h - crop_h) // 2  # Vertical center starting point
+
+#     # Perform centered cropping (to prevent out-of-bounds)
+#     start_x, start_y = max(0, start_x), max(0, start_y)
+#     end_x, end_y = min(w, start_x + crop_w), min(h, start_y + crop_h)
+#     cropped = image[start_y:end_y, start_x:end_x]
+
+#     # Resize to target size (bilinear interpolation)
+#     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+#     return resized
+
+def tf2xyzwxyz(posetf):
+    translation = posetf[:3, 3]
+    orientation = R.from_matrix(posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+    xyzwxyz = (np.concatenate([translation, orientation])).astype("float32")
+
+    return xyzwxyz
+
+def load_lmdb_data(episode_path: Path, sava_path: Path, fps_factor: int, target_fps: int) -> Optional[Dict]:
+    def load_image(txn, key):
+        raw = txn.get(key)
+        data = pickle.loads(raw)
+        image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        # Convert to RGB if necessary
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # image = crop_resize_no_padding(image, target_size=(480, 640))
+        return image
+    # set_trace()
+    if episode_path.name.startswith("00"):
+        # for simbox
+        left_armbase_to_robot_pose = np.array([[ 8.77583782e-01, -2.06711634e-07,  4.79423306e-01, 2.77224123e-01],
+                                                [ 3.04746970e-07,  1.00000000e+00, -1.26671697e-07, -5.41924611e-08],
+                                                [-4.79423306e-01,  2.57267827e-07,  8.77583782e-01, 1.29616246e+00],
+                                                [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, 1.00000000e+00]])
+    elif episode_path.name.startswith("2025"):
+        # for dataengine
+        left_armbase_to_robot_pose = np.array([[ 8.08028872e-01, -3.27251376e-08,  5.89142888e-01, 3.10688674e-01],
+                                                [ 2.22104410e-07,  1.00000000e+00, -2.49076482e-07, -8.51402262e-08],
+                                                [-5.89142888e-01,  3.32112222e-07,  8.08028872e-01, 1.21495271e+00],
+                                                [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, 1.00000000e+00]])
+    right_armbase_to_robot_pose = left_armbase_to_robot_pose
+
+    rot_x = np.eye(4)
+    rot_x[1][1] = -1.0
+    rot_x[2][2] = -1.0
+
+    tcp2ee_pose = np.eye(4)
+    tcp2ee_pose[2, 3] = 0.22
+
+    # model = pin.buildModelFromUrdf("../assets/R5a/R5a.urdf")
+    # data = model.createData()
+
+    try:
+        env = lmdb.open(
+            str(episode_path / "lmdb"),
+            readonly=True,
+            lock=False,
+            max_readers=128,
+            readahead=False
+        )
+        meta_info = pickle.load(open(episode_path/"meta_info.pkl", "rb"))
+        with env.begin(write=False) as txn:
+            # set_trace()
+            keys = [k for k, _ in txn.cursor()]
+            qpos_keys = ['states.left_gripper.position', 'states.left_joint.position', 'states.right_gripper.position', 'states.right_joint.position']
+            master_action_keys = ['master_actions.left_gripper.openness', 'master_actions.left_gripper.position', 'master_actions.left_joint.position', 'master_actions.right_gripper.openness', 'master_actions.right_gripper.position', 'master_actions.right_joint.position']
+            image_keys = ['images.rgb.head', 'images.rgb.hand_left', 'images.rgb.hand_right']
+            compute_qpos_keys = ['states.left_joint.position', 'states.right_joint.position']
+            additional_action_keys = ["actions.left_ee_to_left_armbase_pose", "actions.left_ee_to_robot_pose", "actions.left_tcp_to_left_armbase_pose", "actions.left_tcp_to_robot_pose", "actions.right_ee_to_right_armbase_pose", "actions.right_ee_to_robot_pose", "actions.right_tcp_to_right_armbase_pose", "actions.right_tcp_to_robot_pose"]
+            robot2env_keys = ['robot2env_pose']
+            intrinsics_keys = ['json_data']
+            camera2env_keys = ["camera2env_pose.head", "camera2env_pose.hand_left", "camera2env_pose.hand_right"]
+            num_steps = meta_info["num_steps"]
+            total_steps = []
+            for image_key in image_keys:
+                keys_image_per_step = meta_info['keys'][image_key]
+                total_steps.append(len(keys_image_per_step))
+            
+            state_action_dict = {}
+            ### qpos
+            # set_trace()
+            for key in qpos_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            state_keys = list(state_action_dict.keys())
+            ### next qpos as action
+            for k in state_keys:
+                state_action_dict[k.replace("states", "actions")] = np.concatenate([state_action_dict[k][1:, :], state_action_dict[k][-1, :][None,:]], axis=0)
+            ### master action
+            for key in master_action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            ### ee & tcp pose proprio
+            for compute_qpos_key in compute_qpos_keys:
+                compute_qpos = pickle.loads(txn.get(compute_qpos_key.encode()))
+                ee_to_armbase_poses = []
+                ee_to_robot_poses = []
+                tcp_to_armbase_poses = []
+                tcp_to_robot_poses = []                
+                for each_compute_qpos in compute_qpos:
+                    if "left" in compute_qpos_key:
+                        model = pin.buildModelFromUrdf("../assets/G1_120s/left_arm_parallel.urdf")
+                    elif "right" in compute_qpos_key:
+                        model = pin.buildModelFromUrdf("../assets/G1_120s/right_arm_parallel.urdf")
+                    data = model.createData()
+                    q = np.zeros(model.nq)   # 关节角
+                    ndim = each_compute_qpos.shape[0]
+                    q[:ndim] = each_compute_qpos
+                    pin.forwardKinematics(model, data, q)
+                    pin.updateFramePlacements(model, data)
+                    fid_a = model.getFrameId("arm_base_link")
+                    if "left" in compute_qpos_key:
+                        fid_b = model.getFrameId("arm_l_end_link")
+                    elif "right" in compute_qpos_key:
+                        fid_b = model.getFrameId("arm_r_end_link")
+
+                    T_a = data.oMf[fid_a]    # world -> a
+                    T_b = data.oMf[fid_b]    # world -> b
+                    T_a_b = T_a.inverse() * T_b
+
+                    ee2a_translation = T_a_b.homogeneous[:3, 3]
+                    ee2a_orientation = R.from_matrix(T_a_b.homogeneous[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_armbase_pose = (np.concatenate([ee2a_translation, ee2a_orientation])).astype("float32")
+                    ee_to_armbase_poses.append(ee_to_armbase_pose)
+
+                    tcp_to_arm_base_posetf = T_a_b.homogeneous @ tcp2ee_pose
+                    tcp_to_arm_base_translation = tcp_to_arm_base_posetf[:3, 3]
+                    tcp_to_arm_base_orientation = R.from_matrix(tcp_to_arm_base_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_armbase_pose = (np.concatenate([tcp_to_arm_base_translation, tcp_to_arm_base_orientation])).astype("float32")
+                    tcp_to_armbase_poses.append(tcp_to_armbase_pose)
+
+                    if "left" in compute_qpos_key:
+                        ee_to_robot_posetf = left_armbase_to_robot_pose @ T_a_b.homogeneous
+                    elif "right" in compute_qpos_key:
+                        ee_to_robot_posetf = right_armbase_to_robot_pose @ T_a_b.homogeneous
+                    
+                    ee2r_translation = ee_to_robot_posetf[:3, 3]
+                    ee2r_orientation = R.from_matrix(ee_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_robot_pose = (np.concatenate([ee2r_translation, ee2r_orientation])).astype("float32")
+                    ee_to_robot_poses.append(ee_to_robot_pose)
+                    
+                    tcp_to_robot_posetf = ee_to_robot_posetf @ tcp2ee_pose
+                    tcp_to_robot_translation = tcp_to_robot_posetf[:3, 3]
+                    tcp_to_robot_orientation = R.from_matrix(tcp_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_robot_pose = (np.concatenate([tcp_to_robot_translation, tcp_to_robot_orientation])).astype("float32")
+                    tcp_to_robot_poses.append(tcp_to_robot_pose)
+                
+                if "left" in compute_qpos_key:
+                    ee2a_key = f"states.left_ee_to_left_armbase_pose"
+                    ee2r_key = f"states.left_ee_to_robot_pose"
+                    tcp2a_key = f"states.left_tcp_to_left_armbase_pose"
+                    tcp2r_key = f"states.left_tcp_to_robot_pose"
+                elif "right" in compute_qpos_key:
+                    ee2a_key = f"states.right_ee_to_right_armbase_pose"
+                    ee2r_key = f"states.right_ee_to_robot_pose"
+                    tcp2a_key = f"states.right_tcp_to_right_armbase_pose"
+                    tcp2r_key = f"states.right_tcp_to_robot_pose"
+
+                state_action_dict[ee2a_key] = np.stack(ee_to_armbase_poses)
+                state_action_dict[ee2r_key] = np.stack(ee_to_robot_poses)
+                state_action_dict[tcp2a_key] = np.stack(tcp_to_armbase_poses)
+                state_action_dict[tcp2r_key] = np.stack(tcp_to_robot_poses)
+            ### ee & tcp pose action
+            for additional_action_key in additional_action_keys:
+                additional_state_key = additional_action_key.replace("actions", "states")
+                additional_state = state_action_dict[additional_state_key]
+                additional_action = np.concatenate([additional_state[1:, :], additional_state[-1, :][None,:]], axis=0)
+                state_action_dict[additional_action_key] = additional_action
+            ### intrinsics pose 
+            for intrinsics_key in intrinsics_keys:
+                intrinsics_params = pickle.loads(txn.get(intrinsics_key.encode()))
+                hand_left_camera_params = intrinsics_params["hand_left_camera_params"]
+                hand_left_camera_params = (np.array([hand_left_camera_params[0][0], hand_left_camera_params[1][1], hand_left_camera_params[0][2], hand_left_camera_params[1][2]])).astype("float32")
+                hand_right_camera_params = intrinsics_params["hand_right_camera_params"]
+                hand_right_camera_params = (np.array([hand_right_camera_params[0][0], hand_right_camera_params[1][1], hand_right_camera_params[0][2], hand_right_camera_params[1][2]])).astype("float32")
+                head_camera_params = intrinsics_params["head_camera_params"]
+                head_camera_params = (np.array([head_camera_params[0][0], head_camera_params[1][1], head_camera_params[0][2], head_camera_params[1][2]])).astype("float32")
+                if head_camera_params[2] >= 500:
+                    head_camera_params /= 2
+                state_action_dict["head_camera_intrinsics"] = np.stack([head_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_left_camera_intrinsics"] = np.stack([hand_left_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_right_camera_intrinsics"] = np.stack([hand_right_camera_params for _ in range(num_steps)])
+                # print(state_action_dict["hand_left_camera_intrinsics"].shape, state_action_dict["hand_right_camera_intrinsics"].shape)
+            ### robot2env pose
+            for robot2env_key in robot2env_keys:
+                robot2env_pose_tfs = pickle.loads(txn.get(robot2env_key.encode()))
+                robot2env_pose_7ds = []
+                for robot2env_pose_tf in robot2env_pose_tfs:
+                    translation = robot2env_pose_tf[:3, 3]
+                    orientation = R.from_matrix(robot2env_pose_tf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    robot2env_pose_7d = (np.concatenate([translation, orientation])).astype("float32")
+                    robot2env_pose_7ds.append(robot2env_pose_7d)
+                state_action_dict[robot2env_key] = np.stack(robot2env_pose_7ds)
+            ### camera2env pose
+            for camera2env_key in camera2env_keys:
+                camera2env_pose_tfs = pickle.loads(txn.get(camera2env_key.encode()))
+                camera2robot_poses = []
+                for frame_idx in range(len(camera2env_pose_tfs)):
+                    camera2env_posetf = camera2env_pose_tfs[frame_idx]
+                    robot2env_pose_tf = robot2env_pose_tfs[frame_idx]
+                    camera2robot_pose_tf = np.linalg.inv(robot2env_pose_tf) @ camera2env_posetf @ rot_x
+                    camera2robot_poses.append(tf2xyzwxyz(camera2robot_pose_tf))
+                
+                if camera2env_key == "camera2env_pose.head":
+                    state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_left":
+                    state_action_dict["hand_left_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_right":
+                    state_action_dict["hand_right_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+
+            unique_steps = list(set(total_steps))
+            # import pdb; pdb.set_trace()
+            print("episode_path:", episode_path)
+            print("total_steps: ", total_steps)
+            assert len(unique_steps) == 1 and unique_steps[0]>0, f"no data found or qpos / image steps mismatch in {episode_path}"
+            assert np.max(np.abs(state_action_dict["states.left_joint.position"])) < 2 * np.pi
+            assert np.max(np.abs(state_action_dict["states.right_joint.position"])) < 2 * np.pi
+            selected_steps = [step for step in range(unique_steps[0]) if step % fps_factor == 0]
+            frames = []
+            image_observations = {}
+            for image_key in image_keys:
+                image_observations[image_key] = []
+            start_time = time.time()
+            for step_index, step in enumerate(selected_steps):
+                step_str = f"{step:04d}"
+                data_dict = {}
+                for key, value in state_action_dict.items():
+                    # if "forlan2robot_pose" in key:
+                    #     if key == "fl_forlan2robot_pose":
+                    #         data_dict["states.left_ee_to_left_armbase_pose"] = value[step]
+                    #     elif key == "fr_forlan2robot_pose":
+                    #         data_dict["states.right_ee_to_right_armbase_pose"] = value[step]
+                    if "robot2env_pose" in key:
+                        data_dict["states.robot_to_env_pose"] = value[step]
+                    else:
+                        data_dict[key] = value[step]
+                data_dict["task"] = meta_info['language_instruction']
+                frames.append(data_dict)
+                # import pdb; pdb.set_trace()
+                for image_key in image_keys:
+                    image_key_step_encode = f"{image_key}/{step_str}".encode()
+                    if not image_key_step_encode in keys:
+                        raise ValueError(f"Image key {image_key_step_encode} not found in LMDB keys.")
+                    image_observations[image_key].append(load_image(txn, image_key_step_encode))
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"load image_observations of {episode_path}")
+        env.close()
+        if not frames:
+            return None
+        # set_trace()
+        os.makedirs(sava_path, exist_ok=True)
+        os.makedirs(sava_path/episode_path.name, exist_ok=True)
+        video_paths = {}
+        for image_key in image_keys:
+            h_ori, w_ori =  image_observations[image_key][0].shape[:2]
+            if w_ori == 1280:
+                w_tgt = w_ori//2
+                h_tgt = h_ori//2
+            else:
+                w_tgt = w_ori
+                h_tgt = h_ori
+            imageio.mimsave(
+                sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4', 
+                image_observations[image_key], 
+                fps=target_fps,
+                codec="libsvtav1",
+                # codec="libx264",
+                ffmpeg_params=[
+                    "-crf", "28",            # 画质控制（0-63，默认30）
+                    "-preset", "8",          # 速度预设（0-13，值越高越快但压缩率越低）
+                    # "-g", "240",             # 关键帧间隔（建议 ≥ fps 的 8 倍）
+                    "-pix_fmt", "yuv420p",   # 兼容性像素格式
+                    "-movflags", "+faststart", # 将元数据移到文件开头，便于网络播放
+                    # "-threads", "8",           # 使用的线程数
+                    "-vf", f"scale={w_tgt}:{h_tgt}",
+                    "-y",                      # 覆盖已存在的输出文件
+                ]
+            )
+            video_paths[image_key] = sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4'
+        # imageio.mimsave(sava_path/episode_path.name/'hand_left.mp4', image_observations["images.rgb.hand_left"], fps=target_fps)
+        # imageio.mimsave(sava_path/episode_path.name/'hand_right.mp4', image_observations["images.rgb.hand_right"], fps=target_fps)
+        print(f"imageio.mimsave time taken of {episode_path}")
+
+        return {
+            "frames": frames,
+            "videos": video_paths,
+        }
+
+    except Exception as e:
+        logging.error(f"Failed to load or process LMDB data: {e}")
+        return None
+
+
+def get_all_tasks(src_path: Path, output_path: Path) -> Tuple[Path, Path]:
+    output_path.mkdir(exist_ok=True)
+    yield (src_path, output_path)
+
+def compute_episode_stats(episode_data: Dict[str, List[str] | np.ndarray], features: Dict) -> Dict:
+    ep_stats = {}
+    for key, data in episode_data.items():
+        if features[key]["dtype"] == "string":
+            continue
+        elif features[key]["dtype"] in ["image", "video"]:
+            ep_ft_array = sample_images(data)
+            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            keepdims = True
+        else:
+            ep_ft_array = data  # data is already a np.ndarray
+            axes_to_reduce = 0  # compute stats over the first axis
+            keepdims = data.ndim == 1  # keep as np.array
+
+        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        if features[key]["dtype"] in ["image", "video"]:
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
+            }
+    return ep_stats
+
+def sample_images(input):
+    if type(input) is str:
+        video_path = input
+        reader = torchvision.io.VideoReader(video_path, stream="video")
+        frames = [frame["data"] for frame in reader]
+        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    elif type(input) is np.ndarray:
+        frames_array = input[:, None, :, :]  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    return images
+
+
+def load_local_dataset(episode_path: str, save_path:str, origin_fps=30, target_fps=30):
+    fps_factor = origin_fps // target_fps
+    # print(f"fps downsample factor: {fps_factor}")
+    # logging.info(f"fps downsample factor: {fps_factor}")
+    # for format_str in [f"{episode_id:07d}", f"{episode_id:06d}", str(episode_id)]:
+    #     episode_path = Path(src_path) / format_str
+    #     save_path = Path(save_path) / format_str
+    #     if episode_path.exists():
+    #         break
+    # else:
+    #     logging.warning(f"Episode directory not found for ID {episode_id}")
+    #     return None, None
+    episode_path = Path(episode_path)
+    if not episode_path.exists():
+        logging.warning(f"{episode_path} does not exist")
+        return None, None
+        
+    if not (episode_path / "lmdb/data.mdb").exists():
+        logging.warning(f"LMDB data not found for episode {episode_path}")
+        return None, None
+    
+    raw_dataset = load_lmdb_data(episode_path, save_path, fps_factor, target_fps)
+    if raw_dataset is None:
+        return None, None
+    frames = raw_dataset["frames"] # states, actions, task
+    videos = raw_dataset["videos"] # image paths
+    ## check the frames
+    for camera_name, video_path in videos.items():
+        if not os.path.exists(video_path):
+            logging.error(f"Video file {video_path} does not exist.")
+            print(f"Camera {camera_name} Video file {video_path} does not exist.")
+            return None, None
+    return frames, videos
+
+
+def save_as_lerobot_dataset(task: tuple[Path, Path], repo_id, num_threads, debug, origin_fps=30,  target_fps=30, num_demos=None, robot_type="piper", delete_downsampled_videos=True):
+    src_path, save_path = task
+    print(f"**Processing collected** {src_path}")
+    print(f"**saving to** {save_path}")
+    if save_path.exists():
+        print(f"Output directory {save_path} already exists. Deleting it.")
+        logging.warning(f"Output directory {save_path} already exists. Deleting it.")
+        shutil.rmtree(save_path)
+        # print(f"Output directory {save_path} already exists.")
+        # return 
+
+    dataset = Genie1Dataset.create(
+        repo_id=f"{repo_id}",
+        root=save_path,
+        fps=target_fps,
+        robot_type=robot_type,
+        features=FEATURES,
+    )
+    # [f for f in src_path.glob(f"*")]
+    all_episode_paths = sorted([f.as_posix() for f in src_path.glob(f"*") if f.is_dir()])
+    if num_demos is not None:
+        all_episode_paths = all_episode_paths[:num_demos]
+    # all_subdir_eids = [int(Path(path).name) for path in all_subdir]
+    if debug:
+        for i in range(1):
+            frames, videos = load_local_dataset(episode_path=all_episode_paths[i], save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+            if frames is None or videos is None:
+                print(f"Skipping episode {all_episode_paths[i]} due to missing data.")
+                continue
+            for frame_data in frames:
+                dataset.add_frame(frame_data)
+            dataset.save_episode(videos=videos)
+            if delete_downsampled_videos:
+                for _, video_path in videos.items():
+                    parent_dir = os.path.dirname(video_path)
+                    try:
+                        shutil.rmtree(parent_dir)
+                        # os.remove(video_path)
+                        # print(f"Successfully deleted: {parent_dir}")
+                        print(f"Successfully deleted: {video_path}")
+                    except Exception as e:
+                        pass  # Handle the case where the directory might not exist or is already deleted
+
+    else:
+        counter_episodes_uncomplete = 0
+        for batch_index in range(len(all_episode_paths)//num_threads+1):
+            batch_episode_paths = all_episode_paths[batch_index*num_threads:(batch_index+1)*num_threads]
+            if len(batch_episode_paths) == 0:
+                continue
+            with ThreadPoolExecutor(max_workers=num_threads) as executor:
+                futures = []
+                for episode_path in batch_episode_paths:
+                    print("starting to process episode: ", episode_path)
+                    futures.append(
+                        executor.submit(load_local_dataset, episode_path=episode_path, save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+                    )
+                for raw_dataset in as_completed(futures):
+                    frames, videos = raw_dataset.result()
+                    if frames is None or videos is None:
+                        counter_episodes_uncomplete += 1
+                        print(f"Skipping episode {episode_path} due to missing data.")
+                        continue
+                    for frame_data in frames:
+                        dataset.add_frame(frame_data)
+                    dataset.save_episode(videos=videos)
+                    gc.collect()
+                    print(f"finishing processed {videos}")
+                    if delete_downsampled_videos:
+                        for _, video_path in videos.items():
+                            # Get the parent directory of the video
+                            parent_dir = os.path.dirname(video_path)
+                            try:
+                                shutil.rmtree(parent_dir)
+                                print(f"Successfully deleted: {parent_dir}")
+                            except Exception as e:
+                                pass
+        print("counter_episodes_uncomplete:", counter_episodes_uncomplete)
+
+def main(src_path, save_path, repo_id, num_threads=4, debug=False, origin_fps=30, target_fps=30, num_demos=None):
+    logging.info("Scanning for episodes...")
+    tasks = get_all_tasks(src_path, save_path)
+    # set_trace()
+    if debug:
+        task = next(tasks)
+        save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+    else:
+        for task in tasks:
+            save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert collected data from Piper to Lerobot format.")
+    parser.add_argument(
+        "--src_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/raw_data/agilex_split_aloha/",
+        help="Path to the input file containing collected data in Piper format.",
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/lerobot_v2_1/agilex_split_aloha/",
+        help="Path to the output file where the converted Lerobot format will be saved.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run in debug mode with limited episodes",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=64,
+        help="Number of threads per process",
+    )
+    # parser.add_argument(
+    #     "--task_name",
+    #     type=str,
+    #     required=True,
+    #     default="Pick_up_the_marker_and_put_it_into_the_pen_holder",
+    #     help="Name of the task to be processed. Default is 'Pick_up_the_marker_and_put_it_into_the_pen_holder'.",
+    # )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        # required=True,
+        default="SplitAloha_20250714",
+        help="identifier for the dataset repository.",
+    )
+    parser.add_argument(
+        "--origin_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the obervation video. Default is 30.",
+    )
+    parser.add_argument(
+        "--target_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the downsample video. Default is 30.",
+    )
+    parser.add_argument(
+        "--num_demos",
+        type=int,
+        default=None,
+        help="Demos need to transfer"
+    )
+    args = parser.parse_args()
+    assert int(args.origin_fps) % int(args.target_fps) == 0, "origin_fps must be an integer multiple of target_fps"
+    start_time = time.time()
+    main(
+        src_path=Path(args.src_path),
+        save_path=Path(args.save_path),
+        repo_id=args.repo_id,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        origin_fps=args.origin_fps,
+        target_fps=args.target_fps,
+        num_demos=args.num_demos,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Total time taken: {elapsed_time:.2f} seconds")
--- a/policy/lmdb2lerobotv21/lmdb2lerobot_lift2_a1.py
+++ b/policy/lmdb2lerobotv21/lmdb2lerobot_lift2_a1.py
@@ -0,0 +1,886 @@
+import argparse
+import json
+import logging
+import os
+import gc
+import shutil
+import torchvision
+import cv2
+import h5py
+import lmdb
+import numpy as np
+import pickle
+import torch
+import pinocchio as pin
+import time
+import ray
+import logging
+import pdb
+import os
+import imageio # imageio-ffmpeg
+
+from PIL import Image
+from tqdm import tqdm
+from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import check_timestamps_sync, get_episode_data_index, validate_episode_buffer
+from ray.runtime_env import RuntimeEnv
+from scipy.spatial.transform import Rotation as R
+from copy import deepcopy
+from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple
+
+"""
+    Store both camera image and robot state as a combined observation. 
+    Args:
+        observation: images(camera), states (robot state)
+        actions: joint, gripper, ee_pose
+"""
+FEATURES = {
+    "images.rgb.head": {
+        "dtype": "video",
+        "shape": (360, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_left": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_right": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "head_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_left_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_right_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "head_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_left_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_right_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "states.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "states.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },   
+    "states.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "states.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+    "states.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.robot_to_env_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "actions.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "actions.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "actions.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "master_actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "master_actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.left_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "master_actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "master_actions.right_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+}
+
+class ARXLift2Dataset(LeRobotDataset):
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        download_videos: bool = True,
+        local_files_only: bool = False,
+        video_backend: str | None = None,
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            root=root,
+            episodes=episodes,
+            image_transforms=image_transforms,
+            delta_timestamps=delta_timestamps,
+            tolerance_s=tolerance_s,
+            download_videos=download_videos,
+            local_files_only=local_files_only,
+            video_backend=video_backend,
+        )
+
+    def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
+        if not episode_data:
+            episode_buffer = self.episode_buffer
+
+        validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+        episode_length = episode_buffer.pop("size")
+        tasks = episode_buffer.pop("task")
+        episode_tasks = list(set(tasks))
+        episode_index = episode_buffer["episode_index"]
+
+        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
+        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
+
+        for task in episode_tasks:
+            task_index = self.meta.get_task_index(task)
+            if task_index is None:
+                self.meta.add_task(task)
+
+        episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
+        for key, ft in self.features.items():
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
+                continue
+            episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
+        for key in self.meta.video_keys:
+            video_path = self.root / self.meta.get_video_file_path(episode_index, key)
+            episode_buffer[key] = str(video_path)  # PosixPath -> str
+            video_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copyfile(videos[key], video_path)
+        ep_stats = compute_episode_stats(episode_buffer, self.features)
+        self._save_episode_table(episode_buffer, episode_index)
+        self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
+        ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
+        ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
+        check_timestamps_sync(
+            episode_buffer["timestamp"],
+            episode_buffer["episode_index"],
+            ep_data_index_np,
+            self.fps,
+            self.tolerance_s,
+        )
+        if not episode_data:
+            self.episode_buffer = self.create_episode_buffer()
+
+
+    def add_frame(self, frame: dict) -> None:
+        for name in frame:
+            if isinstance(frame[name], torch.Tensor):
+                frame[name] = frame[name].numpy()
+        features = {key: value for key, value in self.features.items() if key in self.hf_features}
+        if self.episode_buffer is None:
+            self.episode_buffer = self.create_episode_buffer()
+        frame_index = self.episode_buffer["size"]
+        timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(timestamp)
+
+        for key in frame:
+            if key == "task":
+                self.episode_buffer["task"].append(frame["task"])
+                continue
+            if key not in self.features:
+                raise ValueError(f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'.")
+            self.episode_buffer[key].append(frame[key])
+        self.episode_buffer["size"] += 1
+
+# def crop_resize_no_padding(image, target_size=(480, 640)):
+#     """
+#     Crop and scale to target size (no padding)
+#     :param image: input image (NumPy array)
+#     :param target_size: target size (height, width)
+#     :return: processed image
+#     """
+#     h, w = image.shape[:2]
+#     target_h, target_w = target_size
+#     target_ratio = target_w / target_h  # Target aspect ratio (e.g. 640/480=1.333)
+
+#     # the original image aspect ratio and cropping direction
+#     if w / h > target_ratio:  # Original image is wider → crop width
+#         crop_w = int(h * target_ratio)  # Calculate crop width based on target aspect ratio
+#         crop_h = h
+#         start_x = (w - crop_w) // 2  # Horizontal center starting point
+#         start_y = 0
+#     else:  # Original image is higher → crop height
+#         crop_h = int(w / target_ratio)  # Calculate clipping height according to target aspect ratio
+#         crop_w = w
+#         start_x = 0
+#         start_y = (h - crop_h) // 2  # Vertical center starting point
+
+#     # Perform centered cropping (to prevent out-of-bounds)
+#     start_x, start_y = max(0, start_x), max(0, start_y)
+#     end_x, end_y = min(w, start_x + crop_w), min(h, start_y + crop_h)
+#     cropped = image[start_y:end_y, start_x:end_x]
+
+#     # Resize to target size (bilinear interpolation)
+#     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+#     return resized
+
+def tf2xyzwxyz(posetf):
+    translation = posetf[:3, 3]
+    orientation = R.from_matrix(posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+    xyzwxyz = (np.concatenate([translation, orientation])).astype("float32")
+
+    return xyzwxyz
+
+def load_lmdb_data(episode_path: Path, sava_path: Path, fps_factor: int, target_fps: int) -> Optional[Dict]:
+    def load_image(txn, key):
+        raw = txn.get(key)
+        data = pickle.loads(raw)
+        image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        # Convert to RGB if necessary
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # image = crop_resize_no_padding(image, target_size=(480, 640))
+        return image
+
+    left_armbase_to_robot_pose = np.eye(4)
+    left_armbase_to_robot_pose[:3, 3] = np.array([1.63030156e-01, 2.48420062e-01, 6.94000185e-01])
+    right_armbase_to_robot_pose = np.eye(4)
+    right_armbase_to_robot_pose[:3, 3] = np.array([1.63030139e-01, -2.48420087e-01, 6.94000185e-01])
+
+    rot_x = np.eye(4)
+    rot_x[1][1] = -1.0
+    rot_x[2][2] = -1.0
+
+    tcp2ee_pose = np.eye(4)
+    tcp2ee_pose[0, 3] = 0.16157
+
+    model = pin.buildModelFromUrdf("../assets/R5a/R5a.urdf")
+    data = model.createData()
+
+    try:
+
+        env = lmdb.open(
+            str(episode_path / "lmdb"),
+            readonly=True,
+            lock=False,
+            max_readers=128,
+            readahead=False
+        )
+        meta_info = pickle.load(open(episode_path/"meta_info.pkl", "rb"))
+        with env.begin(write=False) as txn:
+            keys = [k for k, _ in txn.cursor()]
+            qpos_keys = ['states.left_gripper.position', 'states.left_joint.position', 'states.right_gripper.position', 'states.right_joint.position']
+            master_action_keys = ['master_actions.left_gripper.openness', 'master_actions.left_gripper.position', 'master_actions.left_joint.position', 'master_actions.right_gripper.openness', 'master_actions.right_gripper.position', 'master_actions.right_joint.position']
+            image_keys = ['images.rgb.head', 'images.rgb.hand_left', 'images.rgb.hand_right']
+            compute_qpos_keys = ['states.left_joint.position', 'states.right_joint.position']
+            additional_action_keys = ["actions.left_ee_to_left_armbase_pose", "actions.left_ee_to_robot_pose", "actions.left_tcp_to_left_armbase_pose", "actions.left_tcp_to_robot_pose", "actions.right_ee_to_right_armbase_pose", "actions.right_ee_to_robot_pose", "actions.right_tcp_to_right_armbase_pose", "actions.right_tcp_to_robot_pose"]
+            robot2env_keys = ['robot2env_pose']
+            intrinsics_keys = ['json_data']
+            camera2env_keys = ["camera2env_pose.head", "camera2env_pose.hand_left", "camera2env_pose.hand_right"]
+            num_steps = meta_info["num_steps"]
+            total_steps = []
+            for image_key in image_keys:
+                keys_image_per_step = meta_info['keys'][image_key]
+                total_steps.append(len(keys_image_per_step))
+            
+            state_action_dict = {}
+            ### qpos
+            for key in qpos_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            state_keys = list(state_action_dict.keys())
+            ### next qpos as action
+            for k in state_keys:
+                state_action_dict[k.replace("states", "actions")] = np.concatenate([state_action_dict[k][1:, :], state_action_dict[k][-1, :][None,:]], axis=0)
+            ### master action
+            for key in master_action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            ### ee & tcp pose proprio
+            for compute_qpos_key in compute_qpos_keys:
+                compute_qpos = pickle.loads(txn.get(compute_qpos_key.encode()))
+                ee_to_armbase_poses = []
+                ee_to_robot_poses = []
+                tcp_to_armbase_poses = []
+                tcp_to_robot_poses = []                
+                for each_compute_qpos in compute_qpos:
+                    q = np.zeros(model.nq)   # 关节角
+                    ndim = each_compute_qpos.shape[0]
+                    q[:ndim] = each_compute_qpos
+                    pin.forwardKinematics(model, data, q)
+                    pin.updateFramePlacements(model, data)
+                    fid_a = model.getFrameId("base_link")
+                    fid_b = model.getFrameId("link6")
+
+                    T_a = data.oMf[fid_a]    # world -> a
+                    T_b = data.oMf[fid_b]    # world -> b
+                    T_a_b = T_a.inverse() * T_b
+
+                    ee2a_translation = T_a_b.homogeneous[:3, 3]
+                    ee2a_orientation = R.from_matrix(T_a_b.homogeneous[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_armbase_pose = (np.concatenate([ee2a_translation, ee2a_orientation])).astype("float32")
+                    ee_to_armbase_poses.append(ee_to_armbase_pose)
+
+                    tcp_to_arm_base_posetf = T_a_b.homogeneous @ tcp2ee_pose
+                    tcp_to_arm_base_translation = tcp_to_arm_base_posetf[:3, 3]
+                    tcp_to_arm_base_orientation = R.from_matrix(tcp_to_arm_base_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_armbase_pose = (np.concatenate([tcp_to_arm_base_translation, tcp_to_arm_base_orientation])).astype("float32")
+                    tcp_to_armbase_poses.append(tcp_to_armbase_pose)
+
+                    if "left" in compute_qpos_key:
+                        ee_to_robot_posetf = left_armbase_to_robot_pose @ T_a_b.homogeneous
+                    elif "right" in compute_qpos_key:
+                        ee_to_robot_posetf = right_armbase_to_robot_pose @ T_a_b.homogeneous
+                    
+                    ee2r_translation = ee_to_robot_posetf[:3, 3]
+                    ee2r_orientation = R.from_matrix(ee_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_robot_pose = (np.concatenate([ee2r_translation, ee2r_orientation])).astype("float32")
+                    ee_to_robot_poses.append(ee_to_robot_pose)
+                    
+                    tcp_to_robot_posetf = ee_to_robot_posetf @ tcp2ee_pose
+                    tcp_to_robot_translation = tcp_to_robot_posetf[:3, 3]
+                    tcp_to_robot_orientation = R.from_matrix(tcp_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_robot_pose = (np.concatenate([tcp_to_robot_translation, tcp_to_robot_orientation])).astype("float32")
+                    tcp_to_robot_poses.append(tcp_to_robot_pose)
+                
+                if "left" in compute_qpos_key:
+                    ee2a_key = f"states.left_ee_to_left_armbase_pose"
+                    ee2r_key = f"states.left_ee_to_robot_pose"
+                    tcp2a_key = f"states.left_tcp_to_left_armbase_pose"
+                    tcp2r_key = f"states.left_tcp_to_robot_pose"
+                elif "right" in compute_qpos_key:
+                    ee2a_key = f"states.right_ee_to_right_armbase_pose"
+                    ee2r_key = f"states.right_ee_to_robot_pose"
+                    tcp2a_key = f"states.right_tcp_to_right_armbase_pose"
+                    tcp2r_key = f"states.right_tcp_to_robot_pose"
+
+                state_action_dict[ee2a_key] = np.stack(ee_to_armbase_poses)
+                state_action_dict[ee2r_key] = np.stack(ee_to_robot_poses)
+                state_action_dict[tcp2a_key] = np.stack(tcp_to_armbase_poses)
+                state_action_dict[tcp2r_key] = np.stack(tcp_to_robot_poses)
+            ### ee & tcp pose action
+            for additional_action_key in additional_action_keys:
+                additional_state_key = additional_action_key.replace("actions", "states")
+                additional_state = state_action_dict[additional_state_key]
+                additional_action = np.concatenate([additional_state[1:, :], additional_state[-1, :][None,:]], axis=0)
+                state_action_dict[additional_action_key] = additional_action
+            ### intrinsics pose 
+            for intrinsics_key in intrinsics_keys:
+                intrinsics_params = pickle.loads(txn.get(intrinsics_key.encode()))
+                hand_left_camera_params = intrinsics_params["hand_left_camera_params"]
+                hand_left_camera_params = (np.array([hand_left_camera_params[0][0], hand_left_camera_params[1][1], hand_left_camera_params[0][2], hand_left_camera_params[1][2]])).astype("float32")
+                hand_right_camera_params = intrinsics_params["hand_right_camera_params"]
+                hand_right_camera_params = (np.array([hand_right_camera_params[0][0], hand_right_camera_params[1][1], hand_right_camera_params[0][2], hand_right_camera_params[1][2]])).astype("float32")
+                try:
+                    head_camera_params = intrinsics_params["head_camera_params"]
+                except:
+                    head_camera_params = intrinsics_params["head_camera_intrinsics"]
+                head_camera_params = (np.array([head_camera_params[0][0], head_camera_params[1][1], head_camera_params[0][2], head_camera_params[1][2]])).astype("float32")
+                if head_camera_params[2] >= 500:
+                    head_camera_params /= 2
+                state_action_dict["head_camera_intrinsics"] = np.stack([head_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_left_camera_intrinsics"] = np.stack([hand_left_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_right_camera_intrinsics"] = np.stack([hand_right_camera_params for _ in range(num_steps)])
+                # print(state_action_dict["hand_left_camera_intrinsics"].shape, state_action_dict["hand_right_camera_intrinsics"].shape)
+            ### robot2env pose
+            for robot2env_key in robot2env_keys:
+                robot2env_pose_tfs = pickle.loads(txn.get(robot2env_key.encode()))
+                robot2env_pose_7ds = []
+                for robot2env_pose_tf in robot2env_pose_tfs:
+                    translation = robot2env_pose_tf[:3, 3]
+                    orientation = R.from_matrix(robot2env_pose_tf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    robot2env_pose_7d = (np.concatenate([translation, orientation])).astype("float32")
+                    robot2env_pose_7ds.append(robot2env_pose_7d)
+                state_action_dict[robot2env_key] = np.stack(robot2env_pose_7ds)
+            ### camera2env pose
+            for camera2env_key in camera2env_keys:
+                camera2env_pose_tfs = pickle.loads(txn.get(camera2env_key.encode()))
+                camera2robot_poses = []
+                for frame_idx in range(len(camera2env_pose_tfs)):
+                    camera2env_posetf = camera2env_pose_tfs[frame_idx]
+                    robot2env_pose_tf = robot2env_pose_tfs[frame_idx]
+                    camera2robot_pose_tf = np.linalg.inv(robot2env_pose_tf) @ camera2env_posetf @ rot_x
+                    camera2robot_poses.append(tf2xyzwxyz(camera2robot_pose_tf))
+                
+                if camera2env_key == "camera2env_pose.head":
+                    state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_left":
+                    state_action_dict["hand_left_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_right":
+                    state_action_dict["hand_right_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+
+            unique_steps = list(set(total_steps))
+            # import pdb; pdb.set_trace()
+            print("episode_path:", episode_path)
+            print("total_steps: ", total_steps)
+            assert len(unique_steps) == 1 and unique_steps[0]>0, f"no data found or qpos / image steps mismatch in {episode_path}"
+            assert np.max(np.abs(state_action_dict["states.left_joint.position"])) < 2 * np.pi
+            assert np.max(np.abs(state_action_dict["states.right_joint.position"])) < 2 * np.pi
+            selected_steps = [step for step in range(unique_steps[0]) if step % fps_factor == 0]
+            frames = []
+            image_observations = {}
+            for image_key in image_keys:
+                image_observations[image_key] = []
+            start_time = time.time()
+            for step_index, step in enumerate(selected_steps):
+                step_str = f"{step:04d}"
+                data_dict = {}
+                for key, value in state_action_dict.items():
+                    # if "forlan2robot_pose" in key:
+                    #     if key == "fl_forlan2robot_pose":
+                    #         data_dict["states.left_ee_to_left_armbase_pose"] = value[step]
+                    #     elif key == "fr_forlan2robot_pose":
+                    #         data_dict["states.right_ee_to_right_armbase_pose"] = value[step]
+                    if "robot2env_pose" in key:
+                        data_dict["states.robot_to_env_pose"] = value[step]
+                    else:
+                        data_dict[key] = value[step]
+                data_dict["task"] = meta_info['language_instruction']
+                frames.append(data_dict)
+                # import pdb; pdb.set_trace()
+                for image_key in image_keys:
+                    image_key_step_encode = f"{image_key}/{step_str}".encode()
+                    if not image_key_step_encode in keys:
+                        raise ValueError(f"Image key {image_key_step_encode} not found in LMDB keys.")
+                    image_observations[image_key].append(load_image(txn, image_key_step_encode))
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"load image_observations of {episode_path}")
+        env.close()
+        if not frames:
+            return None
+        os.makedirs(sava_path, exist_ok=True)
+        os.makedirs(sava_path/episode_path.name, exist_ok=True)
+        video_paths = {}
+        for image_key in image_keys:
+            h_ori, w_ori =  image_observations[image_key][0].shape[:2]
+            if w_ori == 1280:
+                w_tgt = w_ori//2
+                h_tgt = h_ori//2
+            else:
+                w_tgt = w_ori
+                h_tgt = h_ori
+            imageio.mimsave(
+                sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4', 
+                image_observations[image_key], 
+                fps=target_fps,
+                codec="libsvtav1",
+                # codec="libx264",
+                ffmpeg_params=[
+                    "-crf", "28",            # 画质控制（0-63，默认30）
+                    "-preset", "8",          # 速度预设（0-13，值越高越快但压缩率越低）
+                    # "-g", "240",             # 关键帧间隔（建议 ≥ fps 的 8 倍）
+                    "-pix_fmt", "yuv420p",   # 兼容性像素格式
+                    "-movflags", "+faststart", # 将元数据移到文件开头，便于网络播放
+                    # "-threads", "8",           # 使用的线程数
+                    "-vf", f"scale={w_tgt}:{h_tgt}",
+                    "-y",                      # 覆盖已存在的输出文件
+                ]
+            )
+            video_paths[image_key] = sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4'
+        # imageio.mimsave(sava_path/episode_path.name/'hand_left.mp4', image_observations["images.rgb.hand_left"], fps=target_fps)
+        # imageio.mimsave(sava_path/episode_path.name/'hand_right.mp4', image_observations["images.rgb.hand_right"], fps=target_fps)
+        print(f"imageio.mimsave time taken of {episode_path}")
+
+        return {
+            "frames": frames,
+            "videos": video_paths,
+        }
+
+    except Exception as e:
+        logging.error(f"Failed to load or process LMDB data: {e}")
+        return None
+
+
+def get_all_tasks(src_path: Path, output_path: Path) -> Tuple[Path, Path]:
+    output_path.mkdir(exist_ok=True)
+    yield (src_path, output_path)
+
+def compute_episode_stats(episode_data: Dict[str, List[str] | np.ndarray], features: Dict) -> Dict:
+    ep_stats = {}
+    for key, data in episode_data.items():
+        if features[key]["dtype"] == "string":
+            continue
+        elif features[key]["dtype"] in ["image", "video"]:
+            ep_ft_array = sample_images(data)
+            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            keepdims = True
+        else:
+            ep_ft_array = data  # data is already a np.ndarray
+            axes_to_reduce = 0  # compute stats over the first axis
+            keepdims = data.ndim == 1  # keep as np.array
+
+        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        if features[key]["dtype"] in ["image", "video"]:
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
+            }
+    return ep_stats
+
+def sample_images(input):
+    if type(input) is str:
+        video_path = input
+        reader = torchvision.io.VideoReader(video_path, stream="video")
+        frames = [frame["data"] for frame in reader]
+        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    elif type(input) is np.ndarray:
+        frames_array = input[:, None, :, :]  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    return images
+
+
+def load_local_dataset(episode_path: str, save_path:str, origin_fps=30, target_fps=30):
+    fps_factor = origin_fps // target_fps
+    # print(f"fps downsample factor: {fps_factor}")
+    # logging.info(f"fps downsample factor: {fps_factor}")
+    # for format_str in [f"{episode_id:07d}", f"{episode_id:06d}", str(episode_id)]:
+    #     episode_path = Path(src_path) / format_str
+    #     save_path = Path(save_path) / format_str
+    #     if episode_path.exists():
+    #         break
+    # else:
+    #     logging.warning(f"Episode directory not found for ID {episode_id}")
+    #     return None, None
+    episode_path = Path(episode_path)
+    if not episode_path.exists():
+        logging.warning(f"{episode_path} does not exist")
+        return None, None
+        
+    if not (episode_path / "lmdb/data.mdb").exists():
+        logging.warning(f"LMDB data not found for episode {episode_path}")
+        return None, None
+    
+    raw_dataset = load_lmdb_data(episode_path, save_path, fps_factor, target_fps)
+    if raw_dataset is None:
+        return None, None
+    frames = raw_dataset["frames"] # states, actions, task
+    videos = raw_dataset["videos"] # image paths
+    ## check the frames
+    for camera_name, video_path in videos.items():
+        if not os.path.exists(video_path):
+            logging.error(f"Video file {video_path} does not exist.")
+            print(f"Camera {camera_name} Video file {video_path} does not exist.")
+            return None, None
+    return frames, videos
+
+
+def save_as_lerobot_dataset(task: tuple[Path, Path], repo_id, num_threads, debug, origin_fps=30,  target_fps=30, num_demos=None, robot_type="ARX Lift-2", delete_downsampled_videos=True):
+    src_path, save_path = task
+    print(f"**Processing collected** {src_path}")
+    print(f"**saving to** {save_path}")
+    if save_path.exists():
+        print(f"Output directory {save_path} already exists. Deleting it.")
+        logging.warning(f"Output directory {save_path} already exists. Deleting it.")
+        shutil.rmtree(save_path)
+        # print(f"Output directory {save_path} already exists.")
+        # return 
+
+    dataset = ARXLift2Dataset.create(
+        repo_id=f"{repo_id}",
+        root=save_path,
+        fps=target_fps,
+        robot_type=robot_type,
+        features=FEATURES,
+    )
+    all_episode_paths = sorted([f.as_posix() for f in src_path.glob(f"*") if f.is_dir()])
+    if num_demos is not None:
+        all_episode_paths = all_episode_paths[:num_demos]
+    # all_subdir_eids = [int(Path(path).name) for path in all_subdir]
+    if debug:
+        for i in range(1):
+            frames, videos = load_local_dataset(episode_path=all_episode_paths[i], save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+            if frames is None or videos is None:
+                print(f"Skipping episode {all_episode_paths[i]} due to missing data.")
+                continue
+            for frame_data in frames:
+                dataset.add_frame(frame_data)
+            dataset.save_episode(videos=videos)
+            if delete_downsampled_videos:
+                for _, video_path in videos.items():
+                    parent_dir = os.path.dirname(video_path)
+                    try:
+                        shutil.rmtree(parent_dir)
+                        # os.remove(video_path)
+                        # print(f"Successfully deleted: {parent_dir}")
+                        print(f"Successfully deleted: {video_path}")
+                    except Exception as e:
+                        pass  # Handle the case where the directory might not exist or is already deleted
+
+    else:
+        counter_episodes_uncomplete = 0
+        for batch_index in range(len(all_episode_paths)//num_threads+1):
+            batch_episode_paths = all_episode_paths[batch_index*num_threads:(batch_index+1)*num_threads]
+            if len(batch_episode_paths) == 0:
+                continue
+            with ThreadPoolExecutor(max_workers=num_threads) as executor:
+                futures = []
+                for episode_path in batch_episode_paths:
+                    print("starting to process episode: ", episode_path)
+                    futures.append(
+                        executor.submit(load_local_dataset, episode_path=episode_path, save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+                    )
+                for raw_dataset in as_completed(futures):
+                    frames, videos = raw_dataset.result()
+                    if frames is None or videos is None:
+                        counter_episodes_uncomplete += 1
+                        print(f"Skipping episode {episode_path} due to missing data.")
+                        continue
+                    for frame_data in frames:
+                        dataset.add_frame(frame_data)
+                    dataset.save_episode(videos=videos)
+                    gc.collect()
+                    print(f"finishing processed {videos}")
+                    if delete_downsampled_videos:
+                        for _, video_path in videos.items():
+                            # Get the parent directory of the video
+                            parent_dir = os.path.dirname(video_path)
+                            try:
+                                shutil.rmtree(parent_dir)
+                                print(f"Successfully deleted: {parent_dir}")
+                            except Exception as e:
+                                pass
+        print("counter_episodes_uncomplete:", counter_episodes_uncomplete)
+
+def main(src_path, save_path, repo_id, num_threads=4, debug=False, origin_fps=30, target_fps=30, num_demos=None):
+    logging.info("Scanning for episodes...")
+    tasks = get_all_tasks(src_path, save_path)
+    if debug:
+        task = next(tasks)
+        save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+    else:
+        for task in tasks:
+            save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert collected data from Piper to Lerobot format.")
+    parser.add_argument(
+        "--src_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/raw_data/agilex_split_aloha/",
+        help="Path to the input file containing collected data in Piper format.",
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/lerobot_v2_1/agilex_split_aloha/",
+        help="Path to the output file where the converted Lerobot format will be saved.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run in debug mode with limited episodes",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=64,
+        help="Number of threads per process",
+    )
+    # parser.add_argument(
+    #     "--task_name",
+    #     type=str,
+    #     required=True,
+    #     default="Pick_up_the_marker_and_put_it_into_the_pen_holder",
+    #     help="Name of the task to be processed. Default is 'Pick_up_the_marker_and_put_it_into_the_pen_holder'.",
+    # )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        # required=True,
+        default="SplitAloha_20250714",
+        help="identifier for the dataset repository.",
+    )
+    parser.add_argument(
+        "--origin_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the obervation video. Default is 30.",
+    )
+    parser.add_argument(
+        "--target_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the downsample video. Default is 30.",
+    )
+    parser.add_argument(
+        "--num_demos",
+        type=int,
+        default=None,
+        help="Demos need to transfer"
+    )
+    args = parser.parse_args()
+    assert int(args.origin_fps) % int(args.target_fps) == 0, "origin_fps must be an integer multiple of target_fps"
+    start_time = time.time()
+    main(
+        src_path=Path(args.src_path),
+        save_path=Path(args.save_path),
+        repo_id=args.repo_id,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        origin_fps=args.origin_fps,
+        target_fps=args.target_fps,
+        num_demos=args.num_demos,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Total time taken: {elapsed_time:.2f} seconds")
--- a/policy/lmdb2lerobotv21/lmdb2lerobot_split_aloha_a1.py
+++ b/policy/lmdb2lerobotv21/lmdb2lerobot_split_aloha_a1.py
@@ -0,0 +1,883 @@
+import argparse
+import json
+import logging
+import os
+import gc
+import shutil
+import torchvision
+import cv2
+import h5py
+import lmdb
+import numpy as np
+import pickle
+import torch
+import pinocchio as pin
+import time
+import ray
+import logging
+import pdb
+import os
+import imageio # imageio-ffmpeg
+
+from PIL import Image
+from tqdm import tqdm
+from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import check_timestamps_sync, get_episode_data_index, validate_episode_buffer
+from ray.runtime_env import RuntimeEnv
+from scipy.spatial.transform import Rotation as R
+from copy import deepcopy
+from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, ThreadPoolExecutor, as_completed, wait
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple
+
+"""
+    Store both camera image and robot state as a combined observation. 
+    Args:
+        observation: images(camera), states (robot state)
+        actions: joint, gripper, ee_pose
+"""
+FEATURES = {
+    "images.rgb.head": {
+        "dtype": "video",
+        "shape": (360, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_left": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "images.rgb.hand_right": {
+        "dtype": "video",
+        "shape": (480, 640, 3),
+        "names": ["height", "width", "channel"],
+    },
+    "head_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_left_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "hand_right_camera_intrinsics": {
+        "dtype": "float32",
+        "shape": (4,),
+        "names": ["fx", "fy", "cx", "cy"],        
+    },
+    "head_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_left_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "hand_right_camera_to_robot_extrinsics": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "states.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "states.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },   
+    "states.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "states.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+    "states.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "states.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "states.robot_to_env_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "actions.left_ee_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },  
+    "actions.left_tcp_to_left_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.left_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "actions.right_ee_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_ee_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "actions.right_tcp_to_right_armbase_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    },
+    "actions.right_tcp_to_robot_pose": {
+        "dtype": "float32",
+        "shape": (7,),
+        "names": ["position.x", "position.y", "position.z", "quaternion.w", "quaternion.x", "quaternion.y", "quaternion.z"],
+    }, 
+    "master_actions.left_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["left_joint_0", "left_joint_1", "left_joint_2", "left_joint_3", "left_joint_4", "left_joint_5",],
+    },
+    "master_actions.left_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.left_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["left_gripper_0",],
+    },
+    "master_actions.right_joint.position": {
+        "dtype": "float32",
+        "shape": (6,),
+        "names": ["right_joint_0", "right_joint_1", "right_joint_2", "right_joint_3", "right_joint_4", "right_joint_5",],
+    },
+    "master_actions.right_gripper.position": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0", ],
+    },
+    "master_actions.right_gripper.openness": {
+        "dtype": "float32",
+        "shape": (1,),
+        "names": ["right_gripper_0",],
+    },
+}
+
+class SplitAlohaDataset(LeRobotDataset):
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        download_videos: bool = True,
+        local_files_only: bool = False,
+        video_backend: str | None = None,
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            root=root,
+            episodes=episodes,
+            image_transforms=image_transforms,
+            delta_timestamps=delta_timestamps,
+            tolerance_s=tolerance_s,
+            download_videos=download_videos,
+            local_files_only=local_files_only,
+            video_backend=video_backend,
+        )
+
+    def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
+        if not episode_data:
+            episode_buffer = self.episode_buffer
+
+        validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+        episode_length = episode_buffer.pop("size")
+        tasks = episode_buffer.pop("task")
+        episode_tasks = list(set(tasks))
+        episode_index = episode_buffer["episode_index"]
+
+        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
+        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
+
+        for task in episode_tasks:
+            task_index = self.meta.get_task_index(task)
+            if task_index is None:
+                self.meta.add_task(task)
+
+        episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
+        for key, ft in self.features.items():
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
+                continue
+            episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
+        for key in self.meta.video_keys:
+            video_path = self.root / self.meta.get_video_file_path(episode_index, key)
+            episode_buffer[key] = str(video_path)  # PosixPath -> str
+            video_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copyfile(videos[key], video_path)
+        ep_stats = compute_episode_stats(episode_buffer, self.features)
+        self._save_episode_table(episode_buffer, episode_index)
+        self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
+        ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
+        ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
+        check_timestamps_sync(
+            episode_buffer["timestamp"],
+            episode_buffer["episode_index"],
+            ep_data_index_np,
+            self.fps,
+            self.tolerance_s,
+        )
+        if not episode_data:
+            self.episode_buffer = self.create_episode_buffer()
+
+
+    def add_frame(self, frame: dict) -> None:
+        for name in frame:
+            if isinstance(frame[name], torch.Tensor):
+                frame[name] = frame[name].numpy()
+        features = {key: value for key, value in self.features.items() if key in self.hf_features}
+        if self.episode_buffer is None:
+            self.episode_buffer = self.create_episode_buffer()
+        frame_index = self.episode_buffer["size"]
+        timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(timestamp)
+
+        for key in frame:
+            if key == "task":
+                self.episode_buffer["task"].append(frame["task"])
+                continue
+            if key not in self.features:
+                raise ValueError(f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'.")
+            self.episode_buffer[key].append(frame[key])
+        self.episode_buffer["size"] += 1
+
+# def crop_resize_no_padding(image, target_size=(480, 640)):
+#     """
+#     Crop and scale to target size (no padding)
+#     :param image: input image (NumPy array)
+#     :param target_size: target size (height, width)
+#     :return: processed image
+#     """
+#     h, w = image.shape[:2]
+#     target_h, target_w = target_size
+#     target_ratio = target_w / target_h  # Target aspect ratio (e.g. 640/480=1.333)
+
+#     # the original image aspect ratio and cropping direction
+#     if w / h > target_ratio:  # Original image is wider → crop width
+#         crop_w = int(h * target_ratio)  # Calculate crop width based on target aspect ratio
+#         crop_h = h
+#         start_x = (w - crop_w) // 2  # Horizontal center starting point
+#         start_y = 0
+#     else:  # Original image is higher → crop height
+#         crop_h = int(w / target_ratio)  # Calculate clipping height according to target aspect ratio
+#         crop_w = w
+#         start_x = 0
+#         start_y = (h - crop_h) // 2  # Vertical center starting point
+
+#     # Perform centered cropping (to prevent out-of-bounds)
+#     start_x, start_y = max(0, start_x), max(0, start_y)
+#     end_x, end_y = min(w, start_x + crop_w), min(h, start_y + crop_h)
+#     cropped = image[start_y:end_y, start_x:end_x]
+
+#     # Resize to target size (bilinear interpolation)
+#     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+#     return resized
+
+def tf2xyzwxyz(posetf):
+    translation = posetf[:3, 3]
+    orientation = R.from_matrix(posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+    xyzwxyz = (np.concatenate([translation, orientation])).astype("float32")
+
+    return xyzwxyz
+
+def load_lmdb_data(episode_path: Path, sava_path: Path, fps_factor: int, target_fps: int) -> Optional[Dict]:
+    def load_image(txn, key):
+        raw = txn.get(key)
+        data = pickle.loads(raw)
+        image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        # Convert to RGB if necessary
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # image = crop_resize_no_padding(image, target_size=(480, 640))
+        return image
+
+    left_armbase_to_robot_pose = np.eye(4)
+    left_armbase_to_robot_pose[:3, 3] = np.array([0.3684792 , 0.30600064, 0.65270409])
+    right_armbase_to_robot_pose = np.eye(4)
+    right_armbase_to_robot_pose[:3, 3] = np.array([0.36847978, -0.30599953,  0.65270409])
+
+    rot_x = np.eye(4)
+    rot_x[1][1] = -1.0
+    rot_x[2][2] = -1.0
+
+    tcp2ee_pose = np.eye(4)
+    tcp2ee_pose[2, 3] = 0.135
+
+    model = pin.buildModelFromUrdf("../assets/piper100/piper100.urdf")
+    data = model.createData()
+
+    try:
+
+        env = lmdb.open(
+            str(episode_path / "lmdb"),
+            readonly=True,
+            lock=False,
+            max_readers=128,
+            readahead=False
+        )
+        meta_info = pickle.load(open(episode_path/"meta_info.pkl", "rb"))
+        with env.begin(write=False) as txn:
+            keys = [k for k, _ in txn.cursor()]
+            qpos_keys = ['states.left_gripper.position', 'states.left_joint.position', 'states.right_gripper.position', 'states.right_joint.position']
+            master_action_keys = ['master_actions.left_gripper.openness', 'master_actions.left_gripper.position', 'master_actions.left_joint.position', 'master_actions.right_gripper.openness', 'master_actions.right_gripper.position', 'master_actions.right_joint.position']
+            image_keys = ['images.rgb.head', 'images.rgb.hand_left', 'images.rgb.hand_right']
+            compute_qpos_keys = ['states.left_joint.position', 'states.right_joint.position']
+            additional_action_keys = ["actions.left_ee_to_left_armbase_pose", "actions.left_ee_to_robot_pose", "actions.left_tcp_to_left_armbase_pose", "actions.left_tcp_to_robot_pose", "actions.right_ee_to_right_armbase_pose", "actions.right_ee_to_robot_pose", "actions.right_tcp_to_right_armbase_pose", "actions.right_tcp_to_robot_pose"]
+            robot2env_keys = ['robot2env_pose']
+            intrinsics_keys = ['json_data']
+            camera2env_keys = ["camera2env_pose.head", "camera2env_pose.hand_left", "camera2env_pose.hand_right"]
+            num_steps = meta_info["num_steps"]
+            total_steps = []
+            for image_key in image_keys:
+                keys_image_per_step = meta_info['keys'][image_key]
+                total_steps.append(len(keys_image_per_step))
+            
+            state_action_dict = {}
+            ### qpos
+            for key in qpos_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            state_keys = list(state_action_dict.keys())
+            ### next qpos as action
+            for k in state_keys:
+                state_action_dict[k.replace("states", "actions")] = np.concatenate([state_action_dict[k][1:, :], state_action_dict[k][-1, :][None,:]], axis=0)
+            ### master action
+            for key in master_action_keys:
+                state_action_dict[key] = pickle.loads(txn.get(key.encode()))
+                if np.isscalar(state_action_dict[key]):
+                    state_action_dict[key] = np.array([state_action_dict[key]]).astype("float32")
+                state_action_dict[key] = np.stack(state_action_dict[key])
+                total_steps.append(len(state_action_dict[key]))
+            ### ee & tcp pose proprio
+            for compute_qpos_key in compute_qpos_keys:
+                compute_qpos = pickle.loads(txn.get(compute_qpos_key.encode()))
+                ee_to_armbase_poses = []
+                ee_to_robot_poses = []
+                tcp_to_armbase_poses = []
+                tcp_to_robot_poses = []                
+                for each_compute_qpos in compute_qpos:
+                    q = np.zeros(model.nq)   # 关节角
+                    ndim = each_compute_qpos.shape[0]
+                    q[:ndim] = each_compute_qpos
+                    pin.forwardKinematics(model, data, q)
+                    pin.updateFramePlacements(model, data)
+                    fid_a = model.getFrameId("arm_base")
+                    fid_b = model.getFrameId("link6")
+
+                    T_a = data.oMf[fid_a]    # world -> a
+                    T_b = data.oMf[fid_b]    # world -> b
+                    T_a_b = T_a.inverse() * T_b
+
+                    ee2a_translation = T_a_b.homogeneous[:3, 3]
+                    ee2a_orientation = R.from_matrix(T_a_b.homogeneous[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_armbase_pose = (np.concatenate([ee2a_translation, ee2a_orientation])).astype("float32")
+                    ee_to_armbase_poses.append(ee_to_armbase_pose)
+
+                    tcp_to_arm_base_posetf = T_a_b.homogeneous @ tcp2ee_pose
+                    tcp_to_arm_base_translation = tcp_to_arm_base_posetf[:3, 3]
+                    tcp_to_arm_base_orientation = R.from_matrix(tcp_to_arm_base_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_armbase_pose = (np.concatenate([tcp_to_arm_base_translation, tcp_to_arm_base_orientation])).astype("float32")
+                    tcp_to_armbase_poses.append(tcp_to_armbase_pose)
+
+                    if "left" in compute_qpos_key:
+                        ee_to_robot_posetf = left_armbase_to_robot_pose @ T_a_b.homogeneous
+                    elif "right" in compute_qpos_key:
+                        ee_to_robot_posetf = right_armbase_to_robot_pose @ T_a_b.homogeneous
+                    
+                    ee2r_translation = ee_to_robot_posetf[:3, 3]
+                    ee2r_orientation = R.from_matrix(ee_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+
+                    ee_to_robot_pose = (np.concatenate([ee2r_translation, ee2r_orientation])).astype("float32")
+                    ee_to_robot_poses.append(ee_to_robot_pose)
+                    
+                    tcp_to_robot_posetf = ee_to_robot_posetf @ tcp2ee_pose
+                    tcp_to_robot_translation = tcp_to_robot_posetf[:3, 3]
+                    tcp_to_robot_orientation = R.from_matrix(tcp_to_robot_posetf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    tcp_to_robot_pose = (np.concatenate([tcp_to_robot_translation, tcp_to_robot_orientation])).astype("float32")
+                    tcp_to_robot_poses.append(tcp_to_robot_pose)
+                
+                if "left" in compute_qpos_key:
+                    ee2a_key = f"states.left_ee_to_left_armbase_pose"
+                    ee2r_key = f"states.left_ee_to_robot_pose"
+                    tcp2a_key = f"states.left_tcp_to_left_armbase_pose"
+                    tcp2r_key = f"states.left_tcp_to_robot_pose"
+                elif "right" in compute_qpos_key:
+                    ee2a_key = f"states.right_ee_to_right_armbase_pose"
+                    ee2r_key = f"states.right_ee_to_robot_pose"
+                    tcp2a_key = f"states.right_tcp_to_right_armbase_pose"
+                    tcp2r_key = f"states.right_tcp_to_robot_pose"
+
+                state_action_dict[ee2a_key] = np.stack(ee_to_armbase_poses)
+                state_action_dict[ee2r_key] = np.stack(ee_to_robot_poses)
+                state_action_dict[tcp2a_key] = np.stack(tcp_to_armbase_poses)
+                state_action_dict[tcp2r_key] = np.stack(tcp_to_robot_poses)
+            ### ee & tcp pose action
+            for additional_action_key in additional_action_keys:
+                additional_state_key = additional_action_key.replace("actions", "states")
+                additional_state = state_action_dict[additional_state_key]
+                additional_action = np.concatenate([additional_state[1:, :], additional_state[-1, :][None,:]], axis=0)
+                state_action_dict[additional_action_key] = additional_action
+            ### intrinsics pose 
+            for intrinsics_key in intrinsics_keys:
+                intrinsics_params = pickle.loads(txn.get(intrinsics_key.encode()))
+                hand_left_camera_params = intrinsics_params["hand_left_camera_params"]
+                hand_left_camera_params = (np.array([hand_left_camera_params[0][0], hand_left_camera_params[1][1], hand_left_camera_params[0][2], hand_left_camera_params[1][2]])).astype("float32")
+                hand_right_camera_params = intrinsics_params["hand_right_camera_params"]
+                hand_right_camera_params = (np.array([hand_right_camera_params[0][0], hand_right_camera_params[1][1], hand_right_camera_params[0][2], hand_right_camera_params[1][2]])).astype("float32")
+                head_camera_params = intrinsics_params["head_camera_params"]
+                head_camera_params = (np.array([head_camera_params[0][0], head_camera_params[1][1], head_camera_params[0][2], head_camera_params[1][2]])).astype("float32")
+                if head_camera_params[2] >= 500:
+                    head_camera_params /= 2
+                state_action_dict["head_camera_intrinsics"] = np.stack([head_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_left_camera_intrinsics"] = np.stack([hand_left_camera_params for _ in range(num_steps)])
+                state_action_dict["hand_right_camera_intrinsics"] = np.stack([hand_right_camera_params for _ in range(num_steps)])
+                # print(state_action_dict["hand_left_camera_intrinsics"].shape, state_action_dict["hand_right_camera_intrinsics"].shape)
+            ### robot2env pose
+            for robot2env_key in robot2env_keys:
+                robot2env_pose_tfs = pickle.loads(txn.get(robot2env_key.encode()))
+                robot2env_pose_7ds = []
+                for robot2env_pose_tf in robot2env_pose_tfs:
+                    translation = robot2env_pose_tf[:3, 3]
+                    orientation = R.from_matrix(robot2env_pose_tf[:3,:3]).as_quat(scalar_first=True) # w, x, y, z
+                    robot2env_pose_7d = (np.concatenate([translation, orientation])).astype("float32")
+                    robot2env_pose_7ds.append(robot2env_pose_7d)
+                state_action_dict[robot2env_key] = np.stack(robot2env_pose_7ds)
+            ### camera2env pose
+            for camera2env_key in camera2env_keys:
+                camera2env_pose_tfs = pickle.loads(txn.get(camera2env_key.encode()))
+                camera2robot_poses = []
+                for frame_idx in range(len(camera2env_pose_tfs)):
+                    camera2env_posetf = camera2env_pose_tfs[frame_idx]
+                    robot2env_pose_tf = robot2env_pose_tfs[frame_idx]
+                    camera2robot_pose_tf = np.linalg.inv(robot2env_pose_tf) @ camera2env_posetf @ rot_x
+                    camera2robot_poses.append(tf2xyzwxyz(camera2robot_pose_tf))
+                
+                if camera2env_key == "camera2env_pose.head":
+                    state_action_dict["head_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_left":
+                    state_action_dict["hand_left_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+                elif camera2env_key == "camera2env_pose.hand_right":
+                    state_action_dict["hand_right_camera_to_robot_extrinsics"] = np.stack(camera2robot_poses)
+
+            unique_steps = list(set(total_steps))
+            # import pdb; pdb.set_trace()
+            print("episode_path:", episode_path)
+            print("total_steps: ", total_steps)
+            assert len(unique_steps) == 1 and unique_steps[0]>0, f"no data found or qpos / image steps mismatch in {episode_path}"
+            assert np.max(np.abs(state_action_dict["states.left_joint.position"])) < 2 * np.pi
+            assert np.max(np.abs(state_action_dict["states.right_joint.position"])) < 2 * np.pi
+            selected_steps = [step for step in range(unique_steps[0]) if step % fps_factor == 0]
+            frames = []
+            image_observations = {}
+            for image_key in image_keys:
+                image_observations[image_key] = []
+            start_time = time.time()
+            for step_index, step in enumerate(selected_steps):
+                step_str = f"{step:04d}"
+                data_dict = {}
+                for key, value in state_action_dict.items():
+                    # if "forlan2robot_pose" in key:
+                    #     if key == "fl_forlan2robot_pose":
+                    #         data_dict["states.left_ee_to_left_armbase_pose"] = value[step]
+                    #     elif key == "fr_forlan2robot_pose":
+                    #         data_dict["states.right_ee_to_right_armbase_pose"] = value[step]
+                    if "robot2env_pose" in key:
+                        data_dict["states.robot_to_env_pose"] = value[step]
+                    else:
+                        data_dict[key] = value[step]
+                data_dict["task"] = meta_info['language_instruction']
+                frames.append(data_dict)
+                # import pdb; pdb.set_trace()
+                for image_key in image_keys:
+                    image_key_step_encode = f"{image_key}/{step_str}".encode()
+                    if not image_key_step_encode in keys:
+                        raise ValueError(f"Image key {image_key_step_encode} not found in LMDB keys.")
+                    image_observations[image_key].append(load_image(txn, image_key_step_encode))
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"load image_observations of {episode_path}")
+        env.close()
+        if not frames:
+            return None
+        os.makedirs(sava_path, exist_ok=True)
+        os.makedirs(sava_path/episode_path.name, exist_ok=True)
+        video_paths = {}
+        for image_key in image_keys:
+            h_ori, w_ori =  image_observations[image_key][0].shape[:2]
+            if w_ori == 1280:
+                w_tgt = w_ori//2
+                h_tgt = h_ori//2
+            else:
+                w_tgt = w_ori
+                h_tgt = h_ori
+            imageio.mimsave(
+                sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4', 
+                image_observations[image_key], 
+                fps=target_fps,
+                codec="libsvtav1",
+                # codec="libx264",
+                ffmpeg_params=[
+                    "-crf", "28",            # 画质控制（0-63，默认30）
+                    "-preset", "8",          # 速度预设（0-13，值越高越快但压缩率越低）
+                    # "-g", "240",             # 关键帧间隔（建议 ≥ fps 的 8 倍）
+                    "-pix_fmt", "yuv420p",   # 兼容性像素格式
+                    "-movflags", "+faststart", # 将元数据移到文件开头，便于网络播放
+                    # "-threads", "8",           # 使用的线程数
+                    "-vf", f"scale={w_tgt}:{h_tgt}",
+                    "-y",                      # 覆盖已存在的输出文件
+                ]
+            )
+            video_paths[image_key] = sava_path/episode_path.name/f'{image_key.replace(".", "_")}.mp4'
+        # imageio.mimsave(sava_path/episode_path.name/'hand_left.mp4', image_observations["images.rgb.hand_left"], fps=target_fps)
+        # imageio.mimsave(sava_path/episode_path.name/'hand_right.mp4', image_observations["images.rgb.hand_right"], fps=target_fps)
+        print(f"imageio.mimsave time taken of {episode_path}")
+
+        return {
+            "frames": frames,
+            "videos": video_paths,
+        }
+
+    except Exception as e:
+        logging.error(f"Failed to load or process LMDB data: {e}")
+        return None
+
+
+def get_all_tasks(src_path: Path, output_path: Path) -> Tuple[Path, Path]:
+    output_path.mkdir(exist_ok=True)
+    yield (src_path, output_path)
+
+def compute_episode_stats(episode_data: Dict[str, List[str] | np.ndarray], features: Dict) -> Dict:
+    ep_stats = {}
+    for key, data in episode_data.items():
+        if features[key]["dtype"] == "string":
+            continue
+        elif features[key]["dtype"] in ["image", "video"]:
+            ep_ft_array = sample_images(data)
+            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            keepdims = True
+        else:
+            ep_ft_array = data  # data is already a np.ndarray
+            axes_to_reduce = 0  # compute stats over the first axis
+            keepdims = data.ndim == 1  # keep as np.array
+
+        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        if features[key]["dtype"] in ["image", "video"]:
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
+            }
+    return ep_stats
+
+def sample_images(input):
+    if type(input) is str:
+        video_path = input
+        reader = torchvision.io.VideoReader(video_path, stream="video")
+        frames = [frame["data"] for frame in reader]
+        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    elif type(input) is np.ndarray:
+        frames_array = input[:, None, :, :]  # Shape: [T, C, H, W]
+        sampled_indices = sample_indices(len(frames_array))
+        images = None
+        for i, idx in enumerate(sampled_indices):
+            img = frames_array[idx]
+            img = auto_downsample_height_width(img)
+            if images is None:
+                images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
+            images[i] = img
+    return images
+
+
+def load_local_dataset(episode_path: str, save_path:str, origin_fps=30, target_fps=30):
+    fps_factor = origin_fps // target_fps
+    # print(f"fps downsample factor: {fps_factor}")
+    # logging.info(f"fps downsample factor: {fps_factor}")
+    # for format_str in [f"{episode_id:07d}", f"{episode_id:06d}", str(episode_id)]:
+    #     episode_path = Path(src_path) / format_str
+    #     save_path = Path(save_path) / format_str
+    #     if episode_path.exists():
+    #         break
+    # else:
+    #     logging.warning(f"Episode directory not found for ID {episode_id}")
+    #     return None, None
+    episode_path = Path(episode_path)
+    if not episode_path.exists():
+        logging.warning(f"{episode_path} does not exist")
+        return None, None
+        
+    if not (episode_path / "lmdb/data.mdb").exists():
+        logging.warning(f"LMDB data not found for episode {episode_path}")
+        return None, None
+    
+    raw_dataset = load_lmdb_data(episode_path, save_path, fps_factor, target_fps)
+    if raw_dataset is None:
+        return None, None
+    frames = raw_dataset["frames"] # states, actions, task
+    videos = raw_dataset["videos"] # image paths
+    ## check the frames
+    for camera_name, video_path in videos.items():
+        if not os.path.exists(video_path):
+            logging.error(f"Video file {video_path} does not exist.")
+            print(f"Camera {camera_name} Video file {video_path} does not exist.")
+            return None, None
+    return frames, videos
+
+
+def save_as_lerobot_dataset(task: tuple[Path, Path], repo_id, num_threads, debug, origin_fps=30,  target_fps=30, num_demos=None, robot_type="AgileX Split Aloha", delete_downsampled_videos=True):
+    src_path, save_path = task
+    print(f"**Processing collected** {src_path}")
+    print(f"**saving to** {save_path}")
+    if save_path.exists():
+        print(f"Output directory {save_path} already exists. Deleting it.")
+        logging.warning(f"Output directory {save_path} already exists. Deleting it.")
+        shutil.rmtree(save_path)
+        # print(f"Output directory {save_path} already exists.")
+        # return 
+
+    dataset = SplitAlohaDataset.create(
+        repo_id=f"{repo_id}",
+        root=save_path,
+        fps=target_fps,
+        robot_type=robot_type,
+        features=FEATURES,
+    )
+    all_episode_paths = sorted([f.as_posix() for f in src_path.glob(f"*") if f.is_dir()])
+    if num_demos is not None:
+        all_episode_paths = all_episode_paths[:num_demos]
+    # all_subdir_eids = [int(Path(path).name) for path in all_subdir]
+    if debug:
+        for i in range(1):
+            frames, videos = load_local_dataset(episode_path=all_episode_paths[i], save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+            if frames is None or videos is None:
+                print(f"Skipping episode {all_episode_paths[i]} due to missing data.")
+                continue
+            for frame_data in frames:
+                dataset.add_frame(frame_data)
+            dataset.save_episode(videos=videos)
+            if delete_downsampled_videos:
+                for _, video_path in videos.items():
+                    parent_dir = os.path.dirname(video_path)
+                    try:
+                        shutil.rmtree(parent_dir)
+                        # os.remove(video_path)
+                        # print(f"Successfully deleted: {parent_dir}")
+                        print(f"Successfully deleted: {video_path}")
+                    except Exception as e:
+                        pass  # Handle the case where the directory might not exist or is already deleted
+
+    else:
+        counter_episodes_uncomplete = 0
+        for batch_index in range(len(all_episode_paths)//num_threads+1):
+            batch_episode_paths = all_episode_paths[batch_index*num_threads:(batch_index+1)*num_threads]
+            if len(batch_episode_paths) == 0:
+                continue
+            with ThreadPoolExecutor(max_workers=num_threads) as executor:
+                futures = []
+                for episode_path in batch_episode_paths:
+                    print("starting to process episode: ", episode_path)
+                    futures.append(
+                        executor.submit(load_local_dataset, episode_path=episode_path, save_path=save_path, origin_fps=origin_fps, target_fps=target_fps)
+                    )
+                for raw_dataset in as_completed(futures):
+                    frames, videos = raw_dataset.result()
+                    if frames is None or videos is None:
+                        counter_episodes_uncomplete += 1
+                        print(f"Skipping episode {episode_path} due to missing data.")
+                        continue
+                    for frame_data in frames:
+                        dataset.add_frame(frame_data)
+                    dataset.save_episode(videos=videos)
+                    gc.collect()
+                    print(f"finishing processed {videos}")
+                    if delete_downsampled_videos:
+                        for _, video_path in videos.items():
+                            # Get the parent directory of the video
+                            parent_dir = os.path.dirname(video_path)
+                            try:
+                                shutil.rmtree(parent_dir)
+                                print(f"Successfully deleted: {parent_dir}")
+                            except Exception as e:
+                                pass
+        print("counter_episodes_uncomplete:", counter_episodes_uncomplete)
+
+def main(src_path, save_path, repo_id, num_threads=4, debug=False, origin_fps=30, target_fps=30, num_demos=None):
+    logging.info("Scanning for episodes...")
+    tasks = get_all_tasks(src_path, save_path)
+    if debug:
+        task = next(tasks)
+        save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+    else:
+        for task in tasks:
+            save_as_lerobot_dataset(task, repo_id, num_threads=num_threads, debug=debug, origin_fps=origin_fps, target_fps=target_fps, num_demos=num_demos)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert collected data from Piper to Lerobot format.")
+    parser.add_argument(
+        "--src_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/raw_data/agilex_split_aloha/",
+        help="Path to the input file containing collected data in Piper format.",
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        # required=False,
+        default="/fs-computility/efm/shared/datasets/myData-A1/real/lerobot_v2_1/agilex_split_aloha/",
+        help="Path to the output file where the converted Lerobot format will be saved.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run in debug mode with limited episodes",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=64,
+        help="Number of threads per process",
+    )
+    # parser.add_argument(
+    #     "--task_name",
+    #     type=str,
+    #     required=True,
+    #     default="Pick_up_the_marker_and_put_it_into_the_pen_holder",
+    #     help="Name of the task to be processed. Default is 'Pick_up_the_marker_and_put_it_into_the_pen_holder'.",
+    # )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        # required=True,
+        default="SplitAloha_20250714",
+        help="identifier for the dataset repository.",
+    )
+    parser.add_argument(
+        "--origin_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the obervation video. Default is 30.",
+    )
+    parser.add_argument(
+        "--target_fps",
+        type=int,
+        default=30,
+        help="Frames per second for the downsample video. Default is 30.",
+    )
+    parser.add_argument(
+        "--num_demos",
+        type=int,
+        default=None,
+        help="Demos need to transfer"
+    )
+    args = parser.parse_args()
+    assert int(args.origin_fps) % int(args.target_fps) == 0, "origin_fps must be an integer multiple of target_fps"
+    start_time = time.time()
+    main(
+        src_path=Path(args.src_path),
+        save_path=Path(args.save_path),
+        repo_id=args.repo_id,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        origin_fps=args.origin_fps,
+        target_fps=args.target_fps,
+        num_demos=args.num_demos,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Total time taken: {elapsed_time:.2f} seconds")
--- a/policy/openpi-InternData-A1
+++ b/policy/openpi-InternData-A1
--- a/workflows/simbox/tools/art/close_v/readme.md
+++ b/workflows/simbox/tools/art/close_v/readme.md
@@ -6,7 +6,7 @@ We provide an optimized and simplified annotation pipeline that removes many red

 | Configuration | Example | Description |
 |---------------|---------|-------------|
-| **DIR** | `/home/shixu/Downloads/peixun/7265/usd` | Directory where USD files are stored |
+| **DIR** | `YOUR_PATH_TO_DIR/usd` | Directory where USD files are stored |
 | **USD_NAME** | `microwave_0.usd` | Scene description file name |
 | **INSTANCE_NAME** | `microwave7265` | Model identifier in the scene. You can name it yourself, preferably matching the generated file name |

--- a/workflows/simbox/tools/art/open_v/7265/usd/keypoints_config.json
+++ b/workflows/simbox/tools/art/open_v/7265/usd/keypoints_config.json
@@ -1,5 +1,5 @@
 {
-    "DIR": "/home/shixu/dev_shixu/DataEngine/workflows/simbox/tools/art/open_v/7265/usd",
+    "DIR": "YOU_PARENT_DIR_OF_USD",
    "USD_NAME": "microwave_0.usd",
    "INSTANCE_NAME": "microwave7265",
    "link0_initial_prim_path": "/root/group_18",
--- a/workflows/simbox/tools/art/open_v/tools/keypoints_pipeline.sh
+++ b/workflows/simbox/tools/art/open_v/tools/keypoints_pipeline.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-CONFIG_PATH="/home/shixu/dev_shixu/DataEngine/workflows/simbox/tools/art/open_v/7265/usd/keypoints_config.json"
+CONFIG_PATH="YOUR_PATH_TO/keypoints_config.json"

-cd /home/shixu/dev_shixu/DataEngine/workflows/simbox/tools/art/open_v/tools
+cd workflows/simbox/tools/art/open_v/tools

 # 1. rehier
 python rehier.py --config $CONFIG_PATH