Compare commits
2 Commits
user/marin
...
thomwolf_2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c2dd1b881 | ||
|
|
63a5d0be39 |
18
.github/workflows/trufflehog.yml
vendored
18
.github/workflows/trufflehog.yml
vendored
@@ -1,18 +0,0 @@
|
||||
on:
|
||||
push:
|
||||
|
||||
name: Secret Leaks
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
trufflehog:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Secret Scanning
|
||||
uses: trufflesecurity/trufflehog@main
|
||||
@@ -78,15 +78,29 @@ def load_from_raw(raw_dir: Path, out_dir: Path, fps: int):
|
||||
|
||||
image_keys = [key for key in df if "observation.images." in key]
|
||||
|
||||
num_unaligned_images = 0
|
||||
max_episode = 0
|
||||
|
||||
def get_episode_index(row):
|
||||
nonlocal num_unaligned_images
|
||||
nonlocal max_episode
|
||||
episode_index_per_cam = {}
|
||||
for key in image_keys:
|
||||
if isinstance(row[key], float):
|
||||
num_unaligned_images += 1
|
||||
return float("nan")
|
||||
path = row[key][0]["path"]
|
||||
match = re.search(r"_(\d{6}).mp4", path)
|
||||
if not match:
|
||||
raise ValueError(path)
|
||||
episode_index = int(match.group(1))
|
||||
episode_index_per_cam[key] = episode_index
|
||||
|
||||
if episode_index > max_episode:
|
||||
assert episode_index - max_episode == 1
|
||||
max_episode = episode_index
|
||||
else:
|
||||
assert episode_index == max_episode
|
||||
if len(set(episode_index_per_cam.values())) != 1:
|
||||
raise ValueError(
|
||||
f"All cameras are expected to belong to the same episode, but getting {episode_index_per_cam}"
|
||||
@@ -111,11 +125,24 @@ def load_from_raw(raw_dir: Path, out_dir: Path, fps: int):
|
||||
del df["timestamp_utc"]
|
||||
|
||||
# sanity check
|
||||
has_nan = df.isna().any().any()
|
||||
if has_nan:
|
||||
raise ValueError("Dataset contains Nan values.")
|
||||
num_rows_with_nan = df.isna().any(axis=1).sum()
|
||||
assert (
|
||||
num_rows_with_nan == num_unaligned_images
|
||||
), f"Found {num_rows_with_nan} rows with NaN values but {num_unaligned_images} unaligned images."
|
||||
if num_unaligned_images > max_episode * 2:
|
||||
# We allow a few unaligned images, typically at the beginning and end of the episodes for instance
|
||||
# but if there are too many, we raise an error to avoid large chunks of missing data
|
||||
raise ValueError(
|
||||
f"Found {num_unaligned_images} unaligned images out of {max_episode} episodes. "
|
||||
f"Check the timestamps of the cameras."
|
||||
)
|
||||
|
||||
# Drop rows with NaN values now that we double checked and convert episode_index to int
|
||||
df = df.dropna()
|
||||
df["episode_index"] = df["episode_index"].astype(int)
|
||||
|
||||
# sanity check episode indices go from 0 to n-1
|
||||
assert df["episode_index"].max() == max_episode
|
||||
ep_ids = [ep_idx for ep_idx, _ in df.groupby("episode_index")]
|
||||
expected_ep_ids = list(range(df["episode_index"].max() + 1))
|
||||
if ep_ids != expected_ep_ids:
|
||||
@@ -214,8 +241,6 @@ def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=Tru
|
||||
|
||||
if fps is None:
|
||||
fps = 30
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
if not video:
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -243,10 +243,11 @@ def load_previous_and_future_frames(
|
||||
is_pad = min_ > tolerance_s
|
||||
|
||||
# check violated query timestamps are all outside the episode range
|
||||
assert ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all(), (
|
||||
f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tolerance_s=}) inside episode range."
|
||||
"This might be due to synchronization issues with timestamps during data collection."
|
||||
)
|
||||
if not ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all():
|
||||
raise ValueError(
|
||||
f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tolerance_s=}) inside episode range."
|
||||
"This might be due to synchronization issues with timestamps during data collection."
|
||||
)
|
||||
|
||||
# get dataset indices corresponding to frames to be loaded
|
||||
data_ids = ep_data_ids[argmin_]
|
||||
|
||||
@@ -189,7 +189,7 @@ class Logger:
|
||||
training_state["scheduler"] = scheduler.state_dict()
|
||||
torch.save(training_state, save_dir / self.training_state_file_name)
|
||||
|
||||
def save_checkpont(
|
||||
def save_checkpoint(
|
||||
self,
|
||||
train_step: int,
|
||||
policy: Policy,
|
||||
|
||||
@@ -147,7 +147,7 @@ class Normalize(nn.Module):
|
||||
assert not torch.isinf(min).any(), _no_stats_error_str("min")
|
||||
assert not torch.isinf(max).any(), _no_stats_error_str("max")
|
||||
# normalize to [0,1]
|
||||
batch[key] = (batch[key] - min) / (max - min + 1e-8)
|
||||
batch[key] = (batch[key] - min) / (max - min)
|
||||
# normalize to [-1, 1]
|
||||
batch[key] = batch[key] * 2 - 1
|
||||
else:
|
||||
|
||||
@@ -13,71 +13,39 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Use this script to get a quick summary of your system config.
|
||||
It should be able to run without any of LeRobot's dependencies or LeRobot itself installed.
|
||||
"""
|
||||
|
||||
import platform
|
||||
|
||||
HAS_HF_HUB = True
|
||||
HAS_HF_DATASETS = True
|
||||
HAS_NP = True
|
||||
HAS_TORCH = True
|
||||
HAS_LEROBOT = True
|
||||
import huggingface_hub
|
||||
|
||||
try:
|
||||
import huggingface_hub
|
||||
except ImportError:
|
||||
HAS_HF_HUB = False
|
||||
# import dataset
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
try:
|
||||
import datasets
|
||||
except ImportError:
|
||||
HAS_HF_DATASETS = False
|
||||
from lerobot import __version__ as version
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
HAS_NP = False
|
||||
|
||||
try:
|
||||
import torch
|
||||
except ImportError:
|
||||
HAS_TORCH = False
|
||||
|
||||
try:
|
||||
import lerobot
|
||||
except ImportError:
|
||||
HAS_LEROBOT = False
|
||||
|
||||
|
||||
lerobot_version = lerobot.__version__ if HAS_LEROBOT else "N/A"
|
||||
hf_hub_version = huggingface_hub.__version__ if HAS_HF_HUB else "N/A"
|
||||
hf_datasets_version = datasets.__version__ if HAS_HF_DATASETS else "N/A"
|
||||
np_version = np.__version__ if HAS_NP else "N/A"
|
||||
|
||||
torch_version = torch.__version__ if HAS_TORCH else "N/A"
|
||||
torch_cuda_available = torch.cuda.is_available() if HAS_TORCH else "N/A"
|
||||
cuda_version = torch._C._cuda_getCompiledVersion() if HAS_TORCH and torch.version.cuda is not None else "N/A"
|
||||
pt_version = torch.__version__
|
||||
pt_cuda_available = torch.cuda.is_available()
|
||||
pt_cuda_available = torch.cuda.is_available()
|
||||
cuda_version = torch._C._cuda_getCompiledVersion() if torch.version.cuda is not None else "N/A"
|
||||
|
||||
|
||||
# TODO(aliberts): refactor into an actual command `lerobot env`
|
||||
def display_sys_info() -> dict:
|
||||
"""Run this to get basic system info to help for tracking issues & bugs."""
|
||||
info = {
|
||||
"`lerobot` version": lerobot_version,
|
||||
"`lerobot` version": version,
|
||||
"Platform": platform.platform(),
|
||||
"Python version": platform.python_version(),
|
||||
"Huggingface_hub version": hf_hub_version,
|
||||
"Dataset version": hf_datasets_version,
|
||||
"Numpy version": np_version,
|
||||
"PyTorch version (GPU?)": f"{torch_version} ({torch_cuda_available})",
|
||||
"Huggingface_hub version": huggingface_hub.__version__,
|
||||
# TODO(aliberts): Add dataset when https://github.com/huggingface/lerobot/pull/73 is merged
|
||||
# "Dataset version": dataset.__version__,
|
||||
"Numpy version": np.__version__,
|
||||
"PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
|
||||
"Cuda version": cuda_version,
|
||||
"Using GPU in script?": "<fill in>",
|
||||
# "Using distributed or parallel set-up in script?": "<fill in>",
|
||||
"Using distributed or parallel set-up in script?": "<fill in>",
|
||||
}
|
||||
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the last point.\n")
|
||||
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
|
||||
print(format_dict(info))
|
||||
return info
|
||||
|
||||
|
||||
@@ -164,7 +164,10 @@ def rollout(
|
||||
# VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
|
||||
# available of none of the envs finished.
|
||||
if "final_info" in info:
|
||||
successes = [info["is_success"] if info is not None else False for info in info["final_info"]]
|
||||
successes = [
|
||||
info["is_success"] if info is not None and "is_success" in info else False
|
||||
for info in info["final_info"]
|
||||
]
|
||||
else:
|
||||
successes = [False] * env.num_envs
|
||||
|
||||
@@ -209,7 +212,7 @@ def eval_policy(
|
||||
policy: torch.nn.Module,
|
||||
n_episodes: int,
|
||||
max_episodes_rendered: int = 0,
|
||||
videos_dir: Path | None = None,
|
||||
video_dir: Path | None = None,
|
||||
return_episode_data: bool = False,
|
||||
start_seed: int | None = None,
|
||||
enable_progbar: bool = False,
|
||||
@@ -221,7 +224,7 @@ def eval_policy(
|
||||
policy: The policy.
|
||||
n_episodes: The number of episodes to evaluate.
|
||||
max_episodes_rendered: Maximum number of episodes to render into videos.
|
||||
videos_dir: Where to save rendered videos.
|
||||
video_dir: Where to save rendered videos.
|
||||
return_episode_data: Whether to return episode data for online training. Incorporates the data into
|
||||
the "episodes" key of the returned dictionary.
|
||||
start_seed: The first seed to use for the first individual rollout. For all subsequent rollouts the
|
||||
@@ -347,8 +350,8 @@ def eval_policy(
|
||||
):
|
||||
if n_episodes_rendered >= max_episodes_rendered:
|
||||
break
|
||||
videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_path = videos_dir / f"eval_episode_{n_episodes_rendered}.mp4"
|
||||
video_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_path = video_dir / f"eval_episode_{n_episodes_rendered}.mp4"
|
||||
video_paths.append(str(video_path))
|
||||
thread = threading.Thread(
|
||||
target=write_video,
|
||||
@@ -503,10 +506,9 @@ def _compile_episode_data(
|
||||
}
|
||||
|
||||
|
||||
def main(
|
||||
def eval(
|
||||
pretrained_policy_path: str | None = None,
|
||||
hydra_cfg_path: str | None = None,
|
||||
out_dir: str | None = None,
|
||||
config_overrides: list[str] | None = None,
|
||||
):
|
||||
assert (pretrained_policy_path is None) ^ (hydra_cfg_path is None)
|
||||
@@ -514,8 +516,12 @@ def main(
|
||||
hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", config_overrides)
|
||||
else:
|
||||
hydra_cfg = init_hydra_config(hydra_cfg_path, config_overrides)
|
||||
out_dir = (
|
||||
f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
|
||||
)
|
||||
|
||||
if out_dir is None:
|
||||
out_dir = f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
|
||||
raise NotImplementedError()
|
||||
|
||||
# Check device is available
|
||||
device = get_safe_torch_device(hydra_cfg.device, log=True)
|
||||
@@ -543,7 +549,7 @@ def main(
|
||||
policy,
|
||||
hydra_cfg.eval.n_episodes,
|
||||
max_episodes_rendered=10,
|
||||
videos_dir=Path(out_dir) / "videos",
|
||||
video_dir=Path(out_dir) / "eval",
|
||||
start_seed=hydra_cfg.seed,
|
||||
enable_progbar=True,
|
||||
enable_inner_progbar=True,
|
||||
@@ -583,13 +589,6 @@ if __name__ == "__main__":
|
||||
),
|
||||
)
|
||||
parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
|
||||
parser.add_argument(
|
||||
"--out-dir",
|
||||
help=(
|
||||
"Where to save the evaluation outputs. If not provided, outputs are saved in "
|
||||
"outputs/eval/{timestamp}_{env_name}_{policy_name}"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"overrides",
|
||||
nargs="*",
|
||||
@@ -598,7 +597,7 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.pretrained_policy_name_or_path is None:
|
||||
main(hydra_cfg_path=args.config, out_dir=args.out_dir, config_overrides=args.overrides)
|
||||
eval(hydra_cfg_path=args.config, config_overrides=args.overrides)
|
||||
else:
|
||||
try:
|
||||
pretrained_policy_path = Path(
|
||||
@@ -622,8 +621,4 @@ if __name__ == "__main__":
|
||||
"repo ID, nor is it an existing local directory."
|
||||
)
|
||||
|
||||
main(
|
||||
pretrained_policy_path=pretrained_policy_path,
|
||||
out_dir=args.out_dir,
|
||||
config_overrides=args.overrides,
|
||||
)
|
||||
eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides)
|
||||
|
||||
@@ -150,7 +150,6 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
|
||||
grad_norm = info["grad_norm"]
|
||||
lr = info["lr"]
|
||||
update_s = info["update_s"]
|
||||
dataloading_s = info["dataloading_s"]
|
||||
|
||||
# A sample is an (observation,action) pair, where observation and action
|
||||
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
|
||||
@@ -171,7 +170,6 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
|
||||
f"lr:{lr:0.1e}",
|
||||
# in seconds
|
||||
f"updt_s:{update_s:.3f}",
|
||||
f"data_s:{dataloading_s:.3f}", # if not ~0, you are bottlenecked by cpu or io
|
||||
]
|
||||
logging.info(" ".join(log_items))
|
||||
|
||||
@@ -327,9 +325,6 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
|
||||
# Note: this helper will be used in offline and online training loops.
|
||||
def evaluate_and_checkpoint_if_needed(step):
|
||||
_num_digits = max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
|
||||
step_identifier = f"{step:0{_num_digits}d}"
|
||||
|
||||
if cfg.training.eval_freq > 0 and step % cfg.training.eval_freq == 0:
|
||||
logging.info(f"Eval policy at step {step}")
|
||||
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext():
|
||||
@@ -337,7 +332,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
eval_env,
|
||||
policy,
|
||||
cfg.eval.n_episodes,
|
||||
videos_dir=Path(out_dir) / "eval" / f"videos_step_{step_identifier}",
|
||||
video_dir=Path(out_dir) / "eval",
|
||||
max_episodes_rendered=4,
|
||||
start_seed=cfg.seed,
|
||||
)
|
||||
@@ -350,12 +345,14 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
logging.info(f"Checkpoint policy after step {step}")
|
||||
# Note: Save with step as the identifier, and format it to have at least 6 digits but more if
|
||||
# needed (choose 6 as a minimum for consistency without being overkill).
|
||||
logger.save_checkpont(
|
||||
logger.save_checkpoint(
|
||||
step,
|
||||
policy,
|
||||
optimizer,
|
||||
lr_scheduler,
|
||||
identifier=step_identifier,
|
||||
identifier=str(step).zfill(
|
||||
max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
|
||||
),
|
||||
)
|
||||
logging.info("Resume training")
|
||||
|
||||
@@ -385,10 +382,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
for _ in range(step, cfg.training.offline_steps):
|
||||
if step == 0:
|
||||
logging.info("Start offline training on a fixed dataset")
|
||||
|
||||
start_time = time.perf_counter()
|
||||
batch = next(dl_iter)
|
||||
dataloading_s = time.perf_counter() - start_time
|
||||
|
||||
for key in batch:
|
||||
batch[key] = batch[key].to(device, non_blocking=True)
|
||||
@@ -403,8 +397,6 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
use_amp=cfg.use_amp,
|
||||
)
|
||||
|
||||
train_info["dataloading_s"] = dataloading_s
|
||||
|
||||
if step % cfg.training.log_freq == 0:
|
||||
log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline=True)
|
||||
|
||||
|
||||
@@ -106,7 +106,6 @@ def visualize_dataset(
|
||||
ws_port: int = 9087,
|
||||
save: bool = False,
|
||||
output_dir: Path | None = None,
|
||||
root: Path | None = None,
|
||||
) -> Path | None:
|
||||
if save:
|
||||
assert (
|
||||
@@ -114,7 +113,7 @@ def visualize_dataset(
|
||||
), "Set an output directory where to write .rrd files with `--output-dir path/to/directory`."
|
||||
|
||||
logging.info("Loading dataset")
|
||||
dataset = LeRobotDataset(repo_id, root=root)
|
||||
dataset = LeRobotDataset(repo_id)
|
||||
|
||||
logging.info("Loading dataloader")
|
||||
episode_sampler = EpisodeSampler(dataset, episode_index)
|
||||
@@ -257,12 +256,6 @@ def main():
|
||||
help="Directory path to write a .rrd file when `--save 1` is set.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--root",
|
||||
type=str,
|
||||
help="Root directory for a dataset stored on a local machine.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
visualize_dataset(**vars(args))
|
||||
|
||||
|
||||
@@ -244,7 +244,7 @@ def test_load_previous_and_future_frames_outside_tolerance_inside_episode_range(
|
||||
delta_timestamps = {"index": [-0.2, 0, 0.141]}
|
||||
tol = 0.04
|
||||
item = hf_dataset[2]
|
||||
with pytest.raises(AssertionError):
|
||||
with pytest.raises(ValueError):
|
||||
load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol)
|
||||
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from lerobot.scripts.visualize_dataset import visualize_dataset
|
||||
@@ -33,20 +31,3 @@ def test_visualize_dataset(tmpdir, repo_id):
|
||||
output_dir=tmpdir,
|
||||
)
|
||||
assert rrd_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"repo_id",
|
||||
["lerobot/pusht"],
|
||||
)
|
||||
@pytest.mark.parametrize("root", [Path(__file__).parent / "data"])
|
||||
def test_visualize_local_dataset(tmpdir, repo_id, root):
|
||||
rrd_path = visualize_dataset(
|
||||
repo_id,
|
||||
episode_index=0,
|
||||
batch_size=32,
|
||||
save=True,
|
||||
output_dir=tmpdir,
|
||||
root=root,
|
||||
)
|
||||
assert rrd_path.exists()
|
||||
|
||||
Reference in New Issue
Block a user