feat(ci): add trufflehog secrets detection (#254 )

add root argument to the dataset visualizer to visualize local datasets (#249 )
Make display_sys_info.py install-agnostic (#253 )
2024-06-10 14:25:43 +02:00 · 2024-06-10 10:44:32 +02:00 · 2024-06-07 15:02:17 +02:00 · 2024-06-05 08:25:20 +01:00 · 2024-06-04 21:01:53 +02:00 · 2024-06-04 17:02:05 +01:00
11 changed files with 141 additions and 78 deletions
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@@ -0,0 +1,18 @@
+on:
+  push:
+
+name: Secret Leaks
+
+permissions:
+  contents: read
+
+jobs:
+  trufflehog:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Secret Scanning
+      uses: trufflesecurity/trufflehog@main
--- a/lerobot/common/datasets/push_dataset_to_hub/aloha_dora_format.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/aloha_dora_format.py
@@ -78,29 +78,15 @@ def load_from_raw(raw_dir: Path, out_dir: Path, fps: int):

    image_keys = [key for key in df if "observation.images." in key]

-    num_unaligned_images = 0
-    max_episode = 0
-
    def get_episode_index(row):
-        nonlocal num_unaligned_images
-        nonlocal max_episode
        episode_index_per_cam = {}
        for key in image_keys:
-            if isinstance(row[key], float):
-                num_unaligned_images += 1
-                return float("nan")
            path = row[key][0]["path"]
            match = re.search(r"_(\d{6}).mp4", path)
            if not match:
                raise ValueError(path)
            episode_index = int(match.group(1))
            episode_index_per_cam[key] = episode_index
-
-            if episode_index > max_episode:
-                assert episode_index - max_episode == 1
-                max_episode = episode_index
-            else:
-                assert episode_index == max_episode
        if len(set(episode_index_per_cam.values())) != 1:
            raise ValueError(
                f"All cameras are expected to belong to the same episode, but getting {episode_index_per_cam}"
@@ -125,24 +111,11 @@ def load_from_raw(raw_dir: Path, out_dir: Path, fps: int):
    del df["timestamp_utc"]

    # sanity check
-    num_rows_with_nan = df.isna().any(axis=1).sum()
-    assert (
-        num_rows_with_nan == num_unaligned_images
-    ), f"Found {num_rows_with_nan} rows with NaN values but {num_unaligned_images} unaligned images."
-    if num_unaligned_images > max_episode * 2:
-        # We allow a few unaligned images, typically at the beginning and end of the episodes for instance
-        # but if there are too many, we raise an error to avoid large chunks of missing data
-        raise ValueError(
-            f"Found {num_unaligned_images} unaligned images out of {max_episode} episodes. "
-            f"Check the timestamps of the cameras."
-        )
-
-    # Drop rows with NaN values now that we double checked and convert episode_index to int
-    df = df.dropna()
-    df["episode_index"] = df["episode_index"].astype(int)
+    has_nan = df.isna().any().any()
+    if has_nan:
+        raise ValueError("Dataset contains Nan values.")

    # sanity check episode indices go from 0 to n-1
-    assert df["episode_index"].max() == max_episode
    ep_ids = [ep_idx for ep_idx, _ in df.groupby("episode_index")]
    expected_ep_ids = list(range(df["episode_index"].max() + 1))
    if ep_ids != expected_ep_ids:
@@ -241,6 +214,8 @@ def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=Tru

    if fps is None:
        fps = 30
+    else:
+        raise NotImplementedError()

    if not video:
        raise NotImplementedError()
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -243,11 +243,10 @@ def load_previous_and_future_frames(
        is_pad = min_ > tolerance_s

        # check violated query timestamps are all outside the episode range
-        if not ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all():
-            raise ValueError(
-                f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tolerance_s=}) inside episode range."
-                "This might be due to synchronization issues with timestamps during data collection."
-            )
+        assert ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all(), (
+            f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tolerance_s=}) inside episode range."
+            "This might be due to synchronization issues with timestamps during data collection."
+        )

        # get dataset indices corresponding to frames to be loaded
        data_ids = ep_data_ids[argmin_]
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -189,7 +189,7 @@ class Logger:
            training_state["scheduler"] = scheduler.state_dict()
        torch.save(training_state, save_dir / self.training_state_file_name)

-    def save_checkpoint(
+    def save_checkpont(
        self,
        train_step: int,
        policy: Policy,
--- a/lerobot/common/policies/normalize.py
+++ b/lerobot/common/policies/normalize.py
@@ -147,7 +147,7 @@ class Normalize(nn.Module):
                assert not torch.isinf(min).any(), _no_stats_error_str("min")
                assert not torch.isinf(max).any(), _no_stats_error_str("max")
                # normalize to [0,1]
-                batch[key] = (batch[key] - min) / (max - min)
+                batch[key] = (batch[key] - min) / (max - min + 1e-8)
                # normalize to [-1, 1]
                batch[key] = batch[key] * 2 - 1
            else:
--- a/lerobot/scripts/display_sys_info.py
+++ b/lerobot/scripts/display_sys_info.py
@@ -13,39 +13,71 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+"""Use this script to get a quick summary of your system config.
+It should be able to run without any of LeRobot's dependencies or LeRobot itself installed.
+"""
+
 import platform

-import huggingface_hub
+HAS_HF_HUB = True
+HAS_HF_DATASETS = True
+HAS_NP = True
+HAS_TORCH = True
+HAS_LEROBOT = True

-# import dataset
-import numpy as np
-import torch
+try:
+    import huggingface_hub
+except ImportError:
+    HAS_HF_HUB = False

-from lerobot import __version__ as version
+try:
+    import datasets
+except ImportError:
+    HAS_HF_DATASETS = False

-pt_version = torch.__version__
-pt_cuda_available = torch.cuda.is_available()
-pt_cuda_available = torch.cuda.is_available()
-cuda_version = torch._C._cuda_getCompiledVersion() if torch.version.cuda is not None else "N/A"
+try:
+    import numpy as np
+except ImportError:
+    HAS_NP = False
+
+try:
+    import torch
+except ImportError:
+    HAS_TORCH = False
+
+try:
+    import lerobot
+except ImportError:
+    HAS_LEROBOT = False
+
+
+lerobot_version = lerobot.__version__ if HAS_LEROBOT else "N/A"
+hf_hub_version = huggingface_hub.__version__ if HAS_HF_HUB else "N/A"
+hf_datasets_version = datasets.__version__ if HAS_HF_DATASETS else "N/A"
+np_version = np.__version__ if HAS_NP else "N/A"
+
+torch_version = torch.__version__ if HAS_TORCH else "N/A"
+torch_cuda_available = torch.cuda.is_available() if HAS_TORCH else "N/A"
+cuda_version = torch._C._cuda_getCompiledVersion() if HAS_TORCH and torch.version.cuda is not None else "N/A"


 # TODO(aliberts): refactor into an actual command `lerobot env`
 def display_sys_info() -> dict:
    """Run this to get basic system info to help for tracking issues & bugs."""
    info = {
-        "`lerobot` version": version,
+        "`lerobot` version": lerobot_version,
        "Platform": platform.platform(),
        "Python version": platform.python_version(),
-        "Huggingface_hub version": huggingface_hub.__version__,
-        # TODO(aliberts): Add dataset when https://github.com/huggingface/lerobot/pull/73 is merged
-        # "Dataset version": dataset.__version__,
-        "Numpy version": np.__version__,
-        "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
+        "Huggingface_hub version": hf_hub_version,
+        "Dataset version": hf_datasets_version,
+        "Numpy version": np_version,
+        "PyTorch version (GPU?)": f"{torch_version} ({torch_cuda_available})",
        "Cuda version": cuda_version,
        "Using GPU in script?": "<fill in>",
-        "Using distributed or parallel set-up in script?": "<fill in>",
+        # "Using distributed or parallel set-up in script?": "<fill in>",
    }
-    print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
+    print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the last point.\n")
    print(format_dict(info))
    return info

--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -164,10 +164,7 @@ def rollout(
        # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
        # available of none of the envs finished.
        if "final_info" in info:
-            successes = [
-                info["is_success"] if info is not None and "is_success" in info else False
-                for info in info["final_info"]
-            ]
+            successes = [info["is_success"] if info is not None else False for info in info["final_info"]]
        else:
            successes = [False] * env.num_envs

@@ -212,7 +209,7 @@ def eval_policy(
    policy: torch.nn.Module,
    n_episodes: int,
    max_episodes_rendered: int = 0,
-    video_dir: Path | None = None,
+    videos_dir: Path | None = None,
    return_episode_data: bool = False,
    start_seed: int | None = None,
    enable_progbar: bool = False,
@@ -224,7 +221,7 @@ def eval_policy(
        policy: The policy.
        n_episodes: The number of episodes to evaluate.
        max_episodes_rendered: Maximum number of episodes to render into videos.
-        video_dir: Where to save rendered videos.
+        videos_dir: Where to save rendered videos.
        return_episode_data: Whether to return episode data for online training. Incorporates the data into
            the "episodes" key of the returned dictionary.
        start_seed: The first seed to use for the first individual rollout. For all subsequent rollouts the
@@ -350,8 +347,8 @@ def eval_policy(
            ):
                if n_episodes_rendered >= max_episodes_rendered:
                    break
-                video_dir.mkdir(parents=True, exist_ok=True)
-                video_path = video_dir / f"eval_episode_{n_episodes_rendered}.mp4"
+                videos_dir.mkdir(parents=True, exist_ok=True)
+                video_path = videos_dir / f"eval_episode_{n_episodes_rendered}.mp4"
                video_paths.append(str(video_path))
                thread = threading.Thread(
                    target=write_video,
@@ -506,9 +503,10 @@ def _compile_episode_data(
    }


-def eval(
+def main(
    pretrained_policy_path: str | None = None,
    hydra_cfg_path: str | None = None,
+    out_dir: str | None = None,
    config_overrides: list[str] | None = None,
 ):
    assert (pretrained_policy_path is None) ^ (hydra_cfg_path is None)
@@ -516,12 +514,8 @@ def eval(
        hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", config_overrides)
    else:
        hydra_cfg = init_hydra_config(hydra_cfg_path, config_overrides)
-    out_dir = (
-        f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
-    )
-
    if out_dir is None:
-        raise NotImplementedError()
+        out_dir = f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"

    # Check device is available
    device = get_safe_torch_device(hydra_cfg.device, log=True)
@@ -549,7 +543,7 @@ def eval(
            policy,
            hydra_cfg.eval.n_episodes,
            max_episodes_rendered=10,
-            video_dir=Path(out_dir) / "eval",
+            videos_dir=Path(out_dir) / "videos",
            start_seed=hydra_cfg.seed,
            enable_progbar=True,
            enable_inner_progbar=True,
@@ -589,6 +583,13 @@ if __name__ == "__main__":
        ),
    )
    parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
+    parser.add_argument(
+        "--out-dir",
+        help=(
+            "Where to save the evaluation outputs. If not provided, outputs are saved in "
+            "outputs/eval/{timestamp}_{env_name}_{policy_name}"
+        ),
+    )
    parser.add_argument(
        "overrides",
        nargs="*",
@@ -597,7 +598,7 @@ if __name__ == "__main__":
    args = parser.parse_args()

    if args.pretrained_policy_name_or_path is None:
-        eval(hydra_cfg_path=args.config, config_overrides=args.overrides)
+        main(hydra_cfg_path=args.config, out_dir=args.out_dir, config_overrides=args.overrides)
    else:
        try:
            pretrained_policy_path = Path(
@@ -621,4 +622,8 @@ if __name__ == "__main__":
                "repo ID, nor is it an existing local directory."
            )

-        eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides)
+        main(
+            pretrained_policy_path=pretrained_policy_path,
+            out_dir=args.out_dir,
+            config_overrides=args.overrides,
+        )
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -150,6 +150,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
    grad_norm = info["grad_norm"]
    lr = info["lr"]
    update_s = info["update_s"]
+    dataloading_s = info["dataloading_s"]

    # A sample is an (observation,action) pair, where observation and action
    # can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
@@ -170,6 +171,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
        f"lr:{lr:0.1e}",
        # in seconds
        f"updt_s:{update_s:.3f}",
+        f"data_s:{dataloading_s:.3f}",  # if not ~0, you are bottlenecked by cpu or io
    ]
    logging.info(" ".join(log_items))

@@ -325,6 +327,9 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No

    # Note: this helper will be used in offline and online training loops.
    def evaluate_and_checkpoint_if_needed(step):
+        _num_digits = max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
+        step_identifier = f"{step:0{_num_digits}d}"
+
        if cfg.training.eval_freq > 0 and step % cfg.training.eval_freq == 0:
            logging.info(f"Eval policy at step {step}")
            with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext():
@@ -332,7 +337,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
                    eval_env,
                    policy,
                    cfg.eval.n_episodes,
-                    video_dir=Path(out_dir) / "eval",
+                    videos_dir=Path(out_dir) / "eval" / f"videos_step_{step_identifier}",
                    max_episodes_rendered=4,
                    start_seed=cfg.seed,
                )
@@ -345,14 +350,12 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
            logging.info(f"Checkpoint policy after step {step}")
            # Note: Save with step as the identifier, and format it to have at least 6 digits but more if
            # needed (choose 6 as a minimum for consistency without being overkill).
-            logger.save_checkpoint(
+            logger.save_checkpont(
                step,
                policy,
                optimizer,
                lr_scheduler,
-                identifier=str(step).zfill(
-                    max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
-                ),
+                identifier=step_identifier,
            )
            logging.info("Resume training")

@@ -382,7 +385,10 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
    for _ in range(step, cfg.training.offline_steps):
        if step == 0:
            logging.info("Start offline training on a fixed dataset")
+
+        start_time = time.perf_counter()
        batch = next(dl_iter)
+        dataloading_s = time.perf_counter() - start_time

        for key in batch:
            batch[key] = batch[key].to(device, non_blocking=True)
@@ -397,6 +403,8 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
            use_amp=cfg.use_amp,
        )

+        train_info["dataloading_s"] = dataloading_s
+
        if step % cfg.training.log_freq == 0:
            log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline=True)

--- a/lerobot/scripts/visualize_dataset.py
+++ b/lerobot/scripts/visualize_dataset.py
@@ -106,6 +106,7 @@ def visualize_dataset(
    ws_port: int = 9087,
    save: bool = False,
    output_dir: Path | None = None,
+    root: Path | None = None,
 ) -> Path | None:
    if save:
        assert (
@@ -113,7 +114,7 @@ def visualize_dataset(
        ), "Set an output directory where to write .rrd files with `--output-dir path/to/directory`."

    logging.info("Loading dataset")
-    dataset = LeRobotDataset(repo_id)
+    dataset = LeRobotDataset(repo_id, root=root)

    logging.info("Loading dataloader")
    episode_sampler = EpisodeSampler(dataset, episode_index)
@@ -256,6 +257,12 @@ def main():
        help="Directory path to write a .rrd file when `--save 1` is set.",
    )

+    parser.add_argument(
+        "--root",
+        type=str,
+        help="Root directory for a dataset stored on a local machine.",
+    )
+
    args = parser.parse_args()
    visualize_dataset(**vars(args))

--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -244,7 +244,7 @@ def test_load_previous_and_future_frames_outside_tolerance_inside_episode_range(
    delta_timestamps = {"index": [-0.2, 0, 0.141]}
    tol = 0.04
    item = hf_dataset[2]
-    with pytest.raises(ValueError):
+    with pytest.raises(AssertionError):
        load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol)


--- a/tests/test_visualize_dataset.py
+++ b/tests/test_visualize_dataset.py
@@ -13,6 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from pathlib import Path
+
 import pytest

 from lerobot.scripts.visualize_dataset import visualize_dataset
@@ -31,3 +33,20 @@ def test_visualize_dataset(tmpdir, repo_id):
        output_dir=tmpdir,
    )
    assert rrd_path.exists()
+
+
+@pytest.mark.parametrize(
+    "repo_id",
+    ["lerobot/pusht"],
+)
+@pytest.mark.parametrize("root", [Path(__file__).parent / "data"])
+def test_visualize_local_dataset(tmpdir, repo_id, root):
+    rrd_path = visualize_dataset(
+        repo_id,
+        episode_index=0,
+        batch_size=32,
+        save=True,
+        output_dir=tmpdir,
+        root=root,
+    )
+    assert rrd_path.exists()
Author	SHA1	Message	Date
Luc Georges	a06598678c	feat(ci): add trufflehog secrets detection (#254 )	2024-06-10 14:25:43 +02:00
Thomas Lips	055a6f60c6	add root argument to the dataset visualizer to visualize local datasets (#249 )	2024-06-10 10:44:32 +02:00
Simon Alibert	e54d6ea1eb	Make `display_sys_info.py` install-agnostic (#253 )	2024-06-07 15:02:17 +02:00
Alexander Soare	1eb4bfe2e4	Fix videos_dir documentation (#247 )	2024-06-05 08:25:20 +01:00
Alexander Soare	21f222fa1d	Add out_dir option to eval (#244 )	2024-06-04 21:01:53 +02:00
amandip7	33362dbd17	Adding parameter dataloading_s to console logs and wandb for tracking… (#243 ) Co-authored-by: Remi <re.cadene@gmail.com>	2024-06-04 17:02:05 +01:00
Ruijie	b0d954c6e1	Fix bug in normalize to avoid divide by zero (#239 ) Co-authored-by: rj <rj@teleopstrio-razer.lan> Co-authored-by: Remi <re.cadene@gmail.com>	2024-06-04 12:21:28 +02:00