feat(policies): add Nvidia Gr00t N1.5 model (#2292)

* feat(policies): add Nvidia Gr00t N1.5 model Co-authored-by: lbenhorin <lbenhorin@nvidia.com> Co-authored-by: Aravindh <aravindhs@nvidia.com> Co-authored-by: nv-sachdevkartik <ksachdev@nvidia.com> Co-authored-by: youliangt <youliangt@nvidia.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Jade Choghari <chogharijade@gmail.com> * fix(docs): add groot to index Co-authored-by: sachdevkartik <sachdev.kartik25@gmail.com> --------- Co-authored-by: lbenhorin <lbenhorin@nvidia.com> Co-authored-by: Aravindh <aravindhs@nvidia.com> Co-authored-by: nv-sachdevkartik <ksachdev@nvidia.com> Co-authored-by: youliangt <youliangt@nvidia.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Jade Choghari <chogharijade@gmail.com> Co-authored-by: sachdevkartik <sachdev.kartik25@gmail.com>
2025-10-23 13:50:30 +02:00
parent 306429a85b
commit be46bdea8f
26 changed files with 4766 additions and 6 deletions
--- a/tests/policies/groot/test_groot_lerobot.py
+++ b/tests/policies/groot/test_groot_lerobot.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test script for LeRobot's Groot policy forward and inference passes."""
+
+import gc
+import os
+from copy import deepcopy
+from typing import Any
+
+import numpy as np
+import pytest
+import torch
+
+from lerobot.policies.groot.configuration_groot import GrootConfig
+from lerobot.policies.groot.modeling_groot import GrootPolicy
+from lerobot.policies.groot.processor_groot import make_groot_pre_post_processors
+from lerobot.processor import PolicyAction, PolicyProcessorPipeline
+from lerobot.utils.utils import auto_select_torch_device
+from tests.utils import require_cuda  # noqa: E402
+
+pytest.importorskip("transformers")
+
+pytestmark = pytest.mark.skipif(
+    os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true",
+    reason="This test requires local Groot installation and is not meant for CI",
+)
+
+
+# Define constants for dummy data
+DUMMY_STATE_DIM = 44
+DUMMY_ACTION_DIM = 44
+DUMMY_ACTION_HORIZON = 16
+IMAGE_SIZE = 256
+DEVICE = auto_select_torch_device()
+MODEL_PATH = "aractingi/bimanual-handover-groot-10k"
+
+
+def cleanup_memory():
+    """Clean up GPU/MPS memory to prevent OOM errors between tests."""
+    print("\nCleaning up memory...")
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+    if torch.backends.mps.is_available():
+        torch.mps.empty_cache()
+    print("Memory cleanup complete.")
+
+
+def set_seed_all(seed: int):
+    """Set random seed for all RNG sources to ensure reproducibility."""
+    import random
+
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+
+    # Set deterministic behavior
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    torch.use_deterministic_algorithms(True, warn_only=True)
+
+
+def instantiate_lerobot_groot(
+    from_pretrained: bool = False,
+    model_path: str = MODEL_PATH,
+) -> tuple[
+    GrootPolicy,
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+    PolicyProcessorPipeline[PolicyAction, PolicyAction],
+]:
+    """Instantiate LeRobot Groot policy with preprocessor and postprocessor."""
+    if from_pretrained:
+        policy = GrootPolicy.from_pretrained(
+            pretrained_name_or_path=model_path,
+            strict=False,
+        )
+        policy.config.embodiment_tag = "gr1"
+    else:
+        config = GrootConfig(
+            base_model_path=model_path,
+            n_action_steps=DUMMY_ACTION_HORIZON,
+            chunk_size=DUMMY_ACTION_HORIZON,
+            image_size=[IMAGE_SIZE, IMAGE_SIZE],
+            device=DEVICE,
+            embodiment_tag="gr1",
+        )
+        policy = GrootPolicy(config)
+
+    policy.to(DEVICE)
+    policy.config.device = DEVICE
+
+    preprocessor, postprocessor = make_groot_pre_post_processors(
+        config=policy.config,
+        dataset_stats=None,  # Pass None for dataset_stats to disable normalization (original GR00T doesn't normalize)
+    )
+
+    return (policy, preprocessor, postprocessor)
+
+
+def create_dummy_data(device=DEVICE):
+    """Create a dummy data batch for testing."""
+    batch_size = 2
+    prompt = "Pick up the red cube and place it in the bin"
+    state = torch.randn(batch_size, DUMMY_STATE_DIM, dtype=torch.float32, device=device)
+
+    batch = {
+        "observation.state": state,
+        "action": torch.randn(
+            batch_size,
+            DUMMY_ACTION_HORIZON,
+            DUMMY_ACTION_DIM,
+            dtype=torch.float32,
+            device=device,  # Action ground truth (for training)
+        ),
+        "observation.images.ego_view": torch.rand(
+            batch_size,
+            3,
+            IMAGE_SIZE,
+            IMAGE_SIZE,
+            dtype=torch.float32,
+            device=device,  # Images in [0, 1] range as expected by LeRobot
+        ),
+        "task": [prompt for _ in range(batch_size)],
+    }
+
+    return batch
+
+
+@require_cuda
+def test_lerobot_groot_inference():
+    """Test the inference pass (select_action) of LeRobot's Groot policy."""
+    print("Test: LeRobot Groot Inference Pass")
+
+    set_seed_all(42)
+
+    # Instantiate policy and processors
+    lerobot_policy, lerobot_preprocessor, lerobot_postprocessor = instantiate_lerobot_groot(
+        from_pretrained=True
+    )
+    batch = create_dummy_data()
+
+    print("\n[LeRobot] Running inference...")
+    lerobot_policy.eval()
+    batch_lerobot_processed = lerobot_preprocessor(deepcopy(batch))
+
+    # Ensure identical RNG state before inference
+    torch.manual_seed(42)
+
+    with torch.no_grad():
+        lerobot_action = lerobot_policy.select_action(batch_lerobot_processed)
+
+    print(f"\nInference successful. Output action shape: {lerobot_action.shape}")
+    print("Output actions (first 5 dims):")
+    print(lerobot_action[:, :5])
+
+    lerobot_action = lerobot_postprocessor(lerobot_action)
+
+    del lerobot_policy, lerobot_preprocessor, lerobot_postprocessor, batch
+    cleanup_memory()
+
+
+@require_cuda
+def test_lerobot_groot_forward_pass():
+    """Test the forward pass of LeRobot's Groot policy."""
+    print("\n" + "=" * 50)
+    print("Test: LeRobot Groot Forward Pass (Training Mode)")
+
+    set_seed_all(42)
+
+    # Instantiate policy and processors
+    lerobot_policy, lerobot_preprocessor, _ = instantiate_lerobot_groot(from_pretrained=True)
+    batch = create_dummy_data()
+
+    lerobot_policy.eval()
+
+    print("\n[LeRobot] Running forward pass...")
+    batch_lerobot_processed = lerobot_preprocessor(deepcopy(batch))
+
+    set_seed_all(42)
+    with torch.no_grad():
+        lerobot_loss, lerobot_metrics = lerobot_policy.forward(batch_lerobot_processed)
+
+    print("\nForward pass successful.")
+    print(f"  - Loss: {lerobot_loss.item():.6f}")
+    print(f"  - Metrics: {lerobot_metrics}")
+
+    del lerobot_policy, lerobot_preprocessor, batch
+    cleanup_memory()
--- a/tests/policies/groot/test_groot_vs_original.py
+++ b/tests/policies/groot/test_groot_vs_original.py
@@ -0,0 +1,443 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test script to verify Groot policy integration with LeRobot vs the original implementation, only meant to be run locally!"""
+
+import gc
+import os
+from copy import deepcopy
+from typing import Any
+
+import numpy as np
+import pytest
+import torch
+
+from lerobot.policies.groot.configuration_groot import GrootConfig
+from lerobot.policies.groot.modeling_groot import GrootPolicy
+from lerobot.policies.groot.processor_groot import make_groot_pre_post_processors
+from lerobot.processor import PolicyAction, PolicyProcessorPipeline
+
+pytest.importorskip("gr00t")
+pytest.importorskip("transformers")
+
+pytestmark = pytest.mark.skipif(
+    os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true",
+    reason="This test requires local Groot installation and is not meant for CI",
+)
+
+
+from gr00t.data.dataset import ModalityConfig  # noqa: E402
+from gr00t.data.embodiment_tags import EmbodimentTag  # noqa: E402
+from gr00t.data.transform.base import ComposedModalityTransform  # noqa: E402
+from gr00t.model.policy import Gr00tPolicy  # noqa: E402
+
+# GR1 humanoid dimensions (from pretrained model metadata)
+# The actual GR1 robot has 44 dimensions for both state and action
+# GR00TTransform will pad state to 64 and truncate action to 32
+DUMMY_STATE_DIM = 44
+DUMMY_ACTION_DIM = 44
+DUMMY_ACTION_HORIZON = 16
+IMAGE_SIZE = 256
+DEVICE = "cpu"
+MODEL_PATH = "nvidia/GR00T-N1.5-3B"
+
+GR1_BODY_PARTS = {
+    "left_arm": 7,
+    "left_hand": 6,
+    "left_leg": 6,
+    "neck": 3,
+    "right_arm": 7,
+    "right_hand": 6,
+    "right_leg": 6,
+    "waist": 3,
+}
+
+
+def cleanup_memory():
+    """Clean up GPU/MPS memory to prevent OOM errors between tests."""
+    print("\nCleaning up memory...")
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+    if torch.backends.mps.is_available():
+        torch.mps.empty_cache()
+    print("Memory cleanup complete.")
+
+
+def set_seed_all(seed: int):
+    """Set random seed for all RNG sources to ensure reproducibility."""
+    import random
+
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+
+    # Set deterministic behavior
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    torch.use_deterministic_algorithms(True, warn_only=True)
+
+
+def instantiate_lerobot_groot(
+    from_pretrained: bool = False,
+    model_path: str = MODEL_PATH,
+) -> tuple[
+    GrootPolicy,
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+    PolicyProcessorPipeline[PolicyAction, PolicyAction],
+]:
+    """Instantiate LeRobot Groot policy with preprocessor and postprocessor."""
+    if from_pretrained:
+        policy = GrootPolicy.from_pretrained(
+            pretrained_name_or_path=model_path,
+            strict=False,
+        )
+        policy.config.embodiment_tag = "gr1"
+    else:
+        config = GrootConfig(
+            base_model_path=model_path,
+            n_action_steps=DUMMY_ACTION_HORIZON,
+            chunk_size=DUMMY_ACTION_HORIZON,
+            image_size=[IMAGE_SIZE, IMAGE_SIZE],
+            device=DEVICE,
+            embodiment_tag="gr1",
+        )
+        policy = GrootPolicy(config)
+
+    policy.to(DEVICE)
+    policy.config.device = DEVICE
+
+    preprocessor, postprocessor = make_groot_pre_post_processors(
+        config=policy.config,
+        dataset_stats=None,  # Pass None for dataset_stats to disable normalization (original GR00T doesn't normalize)
+    )
+
+    return (policy, preprocessor, postprocessor)
+
+
+def instantiate_original_groot(
+    from_pretrained: bool = False,
+    model_path: str = MODEL_PATH,
+):
+    """Instantiate original Groot policy from NVIDIA's implementation."""
+    from gr00t.data.transform.concat import ConcatTransform
+    from gr00t.data.transform.state_action import StateActionToTensor
+    from gr00t.data.transform.video import VideoToNumpy, VideoToTensor
+    from gr00t.model.transforms import GR00TTransform
+
+    video_keys = ["video.ego_view"]
+    state_keys = [
+        "state"
+    ]  # Important: Use single concatenated "state" key (not split body parts) to match preprocessing
+    action_keys = [
+        "action.left_arm",
+        "action.right_arm",
+        "action.left_hand",
+        "action.right_hand",
+        "action.left_leg",
+        "action.right_leg",
+        "action.neck",
+        "action.waist",
+    ]
+    language_keys = ["annotation.human.action.task_description"]
+
+    modality_config = {
+        "video": ModalityConfig(
+            delta_indices=[0],  # Current frame only
+            modality_keys=video_keys,
+        ),
+        "state": ModalityConfig(
+            delta_indices=[0],
+            modality_keys=state_keys,
+        ),
+        "action": ModalityConfig(
+            delta_indices=list(range(DUMMY_ACTION_HORIZON)),
+            modality_keys=action_keys,
+        ),
+        "language": ModalityConfig(
+            delta_indices=[0],
+            modality_keys=language_keys,
+        ),
+    }
+
+    modality_transform = ComposedModalityTransform(
+        transforms=[
+            VideoToTensor(apply_to=video_keys),
+            VideoToNumpy(apply_to=video_keys),  # Convert to numpy (GR00TTransform expects numpy arrays)
+            # State is already a single concatenated key, so no StateActionToTensor needed
+            # Convert action from numpy to tensor
+            StateActionToTensor(apply_to=action_keys),
+            # Concatenate only video and actions (state is already single key)
+            ConcatTransform(
+                video_concat_order=video_keys,
+                state_concat_order=[],  # Empty:state is already single key
+                action_concat_order=action_keys,
+            ),
+            GR00TTransform(
+                max_state_dim=64,
+                max_action_dim=32,
+                state_horizon=1,
+                action_horizon=DUMMY_ACTION_HORIZON,
+                training=False,
+            ),
+        ]
+    )
+
+    policy = Gr00tPolicy(
+        model_path=model_path,
+        embodiment_tag=EmbodimentTag.GR1,
+        modality_config=modality_config,
+        modality_transform=modality_transform,
+        device=DEVICE,
+    )
+
+    return policy, modality_config, modality_transform
+
+
+def create_dummy_data(device=DEVICE):
+    """Create dummy data for testing both implementations."""
+    batch_size = 2
+    prompt = "Pick up the red cube and place it in the bin"
+    state = torch.randn(batch_size, DUMMY_STATE_DIM, dtype=torch.float32, device=device)
+
+    batch = {
+        "observation.state": state,
+        "action": torch.randn(
+            batch_size,
+            DUMMY_ACTION_HORIZON,
+            DUMMY_ACTION_DIM,
+            dtype=torch.float32,
+            device=device,  # Action ground truth (for training)
+        ),
+        "observation.images.ego_view": torch.rand(
+            batch_size,
+            3,
+            IMAGE_SIZE,
+            IMAGE_SIZE,
+            dtype=torch.float32,
+            device=device,  # Images in [0, 1] range as expected by LeRobot
+        ),
+        "task": [prompt for _ in range(batch_size)],
+    }
+
+    return batch
+
+
+def convert_lerobot_to_original_format(batch, modality_config):
+    """Convert LeRobot batch format to original Groot format.
+
+    The original Groot expects observations in this format:
+    {
+        "video.<camera_name>": np.ndarray (T, H, W, C) or (B, T, H, W, C)
+        "state.<state_component>": np.ndarray (T, D) or (B, T, D)
+        "action.<action_component>": np.ndarray (T, D) or (B, T, D)
+        "annotation.<annotation_type>": str or list[str]
+    }
+    """
+    # Original Groot expects (T, H, W, C) format for images
+    # LeRobot has (B, C, H, W) format, so we need to convert
+    observation = {}
+
+    for img_key in ["ego_view"]:
+        lerobot_key = f"observation.images.{img_key}"
+        if lerobot_key in batch:
+            img = batch[lerobot_key]
+            # Convert from (B, C, H, W) to (B, T=1, H, W, C)
+            img_np = img.permute(0, 2, 3, 1).unsqueeze(1).cpu().numpy()
+            # Convert [0, 1] to [0, 255] uint8 as expected by original
+            img_np = (img_np * 255).astype(np.uint8)
+            observation[f"video.{img_key}"] = img_np
+
+    # Important: The Original's GR00TTransform expects "state" as (B, T, D), not split body parts
+    if "observation.state" in batch:
+        state = batch["observation.state"]
+        state_np = state.unsqueeze(1).cpu().numpy()  # (B, 1, D)
+        observation["state"] = state_np
+
+    if "action" in batch:
+        action = batch["action"]
+        action_np = action.cpu().numpy()
+
+        start_idx = 0
+        for part_name, part_dim in GR1_BODY_PARTS.items():
+            end_idx = start_idx + part_dim
+            observation[f"action.{part_name}"] = action_np[:, :, start_idx:end_idx]
+            start_idx = end_idx
+
+    if "task" in batch:
+        task_list = batch["task"]
+        # GR00TTransform expects language with (B, T) shape for batched data
+        # Create a (B, T=1) array where each element is the string directly
+        bsz = len(task_list)
+        task_array = np.empty((bsz, 1), dtype=object)
+        for i in range(bsz):
+            task_array[i, 0] = task_list[i]  # Assign string directly to each (i, 0) position
+        observation["annotation.human.action.task_description"] = task_array
+
+    return observation
+
+
+def test_groot_original_vs_lerobot_pretrained():
+    """Test Groot original implementation vs LeRobot implementation with pretrained weights."""
+    print("Test: Groot Original vs LeRobot with Pretrained Weights (Inference)")
+
+    set_seed_all(42)
+
+    lerobot_policy, lerobot_preprocessor, lerobot_postprocessor = instantiate_lerobot_groot(
+        from_pretrained=True
+    )
+    original_policy, modality_config, modality_transform = instantiate_original_groot(from_pretrained=True)
+
+    batch = create_dummy_data()
+    batch_lerobot = deepcopy(batch)
+
+    print("\n[LeRobot] Running inference...")
+    lerobot_policy.eval()
+    batch_lerobot_processed = lerobot_preprocessor(batch_lerobot)
+
+    # Important: Reset seed immediately before inference to ensure identical RNG state
+    torch.manual_seed(42)
+
+    with torch.no_grad():
+        lerobot_actions = lerobot_policy.select_action(batch_lerobot_processed)
+
+    print("\n[Original] Running inference...")
+    original_policy.model.eval()
+    observation = convert_lerobot_to_original_format(batch, modality_config)
+    original_obs_transformed = modality_transform(deepcopy(observation))
+
+    # Important: Reset seed immediately before inference to ensure identical RNG state
+    torch.manual_seed(42)
+
+    with torch.no_grad():
+        original_model_output = original_policy.model.get_action(original_obs_transformed)
+        original_actions_raw = original_model_output["action_pred"]  # [2, 16, 32]
+    # Take first timestep
+    original_actions = original_actions_raw[:, 0, :].to(lerobot_actions.device).to(lerobot_actions.dtype)
+
+    print("Action Comparison:")
+    diff = lerobot_actions - original_actions
+    abs_diff = torch.abs(diff)
+
+    for batch_idx in range(lerobot_actions.shape[0]):
+        print(f"\n{'=' * 60}")
+        print(f"Batch {batch_idx}")
+        print(f"{'=' * 60}")
+        print(f"{'Idx':<5} {'LeRobot':<14} {'Original':<14} {'Difference':<14}")
+        print("-" * 60)
+        for action_idx in range(lerobot_actions.shape[1]):
+            lr_val = lerobot_actions[batch_idx, action_idx].item()
+            orig_val = original_actions[batch_idx, action_idx].item()
+            diff_val = abs(lr_val - orig_val)
+            sign = "+" if (lr_val - orig_val) > 0 else "-"
+            print(f"{action_idx:<5} {lr_val:>13.6f} {orig_val:>13.6f} {sign}{diff_val:>12.6f}")
+
+    max_diff = abs_diff.max().item()
+    tolerance = 0.001
+    assert torch.allclose(lerobot_actions, original_actions, atol=tolerance), (
+        f"Actions differ by more than tolerance ({tolerance}): max diff = {max_diff:.6f}"
+    )
+    print(f"\nSuccess: Actions match within tolerance ({tolerance})!")
+
+    del lerobot_policy, lerobot_preprocessor, lerobot_postprocessor
+    del original_policy, modality_config, modality_transform
+    del batch, batch_lerobot, observation
+    cleanup_memory()
+
+
+def test_groot_forward_pass_comparison():
+    """Test forward pass comparison between LeRobot and Original Groot implementations."""
+    print("Test: Forward Pass Comparison (Training Mode)")
+
+    set_seed_all(42)
+
+    lerobot_policy, lerobot_preprocessor, lerobot_postprocessor = instantiate_lerobot_groot(
+        from_pretrained=True
+    )
+    original_policy, modality_config, modality_transform = instantiate_original_groot(from_pretrained=True)
+
+    batch = create_dummy_data()
+    lerobot_policy.eval()
+    original_policy.model.eval()
+
+    print("\n[LeRobot] Running forward pass...")
+    batch_lerobot = deepcopy(batch)
+    batch_lerobot_processed = lerobot_preprocessor(batch_lerobot)
+
+    set_seed_all(42)
+    with torch.no_grad():
+        lerobot_loss, lerobot_metrics = lerobot_policy.forward(batch_lerobot_processed)
+
+    print(f"  Loss: {lerobot_loss.item():.6f}")
+
+    print("\n[Original] Running forward pass...")
+    observation = convert_lerobot_to_original_format(batch, modality_config)
+    transformed_obs = modality_transform(observation)
+
+    if "action" not in transformed_obs:
+        action_for_forward = batch_lerobot_processed["action"]
+        action_mask_for_forward = batch_lerobot_processed["action_mask"]
+
+        # Match action horizon if needed
+        if action_for_forward.shape[1] != original_policy.model.action_horizon:
+            if action_for_forward.shape[1] < original_policy.model.action_horizon:
+                pad_size = original_policy.model.action_horizon - action_for_forward.shape[1]
+                last_action = action_for_forward[:, -1:, :]
+                padding = last_action.repeat(1, pad_size, 1)
+                action_for_forward = torch.cat([action_for_forward, padding], dim=1)
+
+                mask_padding = torch.zeros(
+                    action_mask_for_forward.shape[0],
+                    pad_size,
+                    action_mask_for_forward.shape[2],
+                    dtype=action_mask_for_forward.dtype,
+                    device=action_mask_for_forward.device,
+                )
+                action_mask_for_forward = torch.cat([action_mask_for_forward, mask_padding], dim=1)
+            else:
+                action_for_forward = action_for_forward[:, : original_policy.model.action_horizon, :]
+                action_mask_for_forward = action_mask_for_forward[
+                    :, : original_policy.model.action_horizon, :
+                ]
+
+        transformed_obs["action"] = action_for_forward
+        transformed_obs["action_mask"] = action_mask_for_forward
+
+    set_seed_all(42)
+    with torch.no_grad():
+        original_outputs = original_policy.model.forward(transformed_obs)
+
+    original_loss = original_outputs["loss"]
+    print(f"  Loss: {original_loss.item():.6f}")
+
+    loss_diff = abs(lerobot_loss.item() - original_loss.item())
+    loss_rel_diff = loss_diff / (abs(original_loss.item()) + 1e-8) * 100
+
+    print("\nLoss Values:")
+    print(f"  LeRobot: {lerobot_loss.item():.6f}")
+    print(f"  Original: {original_loss.item():.6f}")
+    print(f"  Absolute difference: {loss_diff:.6f}")
+    print(f"  Relative difference: {loss_rel_diff:.2f}%")
+
+    del lerobot_policy, lerobot_preprocessor, lerobot_postprocessor
+    del original_policy, modality_config, modality_transform
+    del batch, batch_lerobot, observation, transformed_obs
+    cleanup_memory()