Compare commits

...

7 Commits

Author SHA1 Message Date
Luc Georges
a06598678c feat(ci): add trufflehog secrets detection (#254) 2024-06-10 14:25:43 +02:00
Thomas Lips
055a6f60c6 add root argument to the dataset visualizer to visualize local datasets (#249) 2024-06-10 10:44:32 +02:00
Simon Alibert
e54d6ea1eb Make display_sys_info.py install-agnostic (#253) 2024-06-07 15:02:17 +02:00
Alexander Soare
1eb4bfe2e4 Fix videos_dir documentation (#247) 2024-06-05 08:25:20 +01:00
Alexander Soare
21f222fa1d Add out_dir option to eval (#244) 2024-06-04 21:01:53 +02:00
amandip7
33362dbd17 Adding parameter dataloading_s to console logs and wandb for tracking… (#243)
Co-authored-by: Remi <re.cadene@gmail.com>
2024-06-04 17:02:05 +01:00
Ruijie
b0d954c6e1 Fix bug in normalize to avoid divide by zero (#239)
Co-authored-by: rj <rj@teleopstrio-razer.lan>
Co-authored-by: Remi <re.cadene@gmail.com>
2024-06-04 12:21:28 +02:00
7 changed files with 128 additions and 36 deletions

18
.github/workflows/trufflehog.yml vendored Normal file
View File

@@ -0,0 +1,18 @@
on:
push:
name: Secret Leaks
permissions:
contents: read
jobs:
trufflehog:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main

View File

@@ -147,7 +147,7 @@ class Normalize(nn.Module):
assert not torch.isinf(min).any(), _no_stats_error_str("min")
assert not torch.isinf(max).any(), _no_stats_error_str("max")
# normalize to [0,1]
batch[key] = (batch[key] - min) / (max - min)
batch[key] = (batch[key] - min) / (max - min + 1e-8)
# normalize to [-1, 1]
batch[key] = batch[key] * 2 - 1
else:

View File

@@ -13,39 +13,71 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Use this script to get a quick summary of your system config.
It should be able to run without any of LeRobot's dependencies or LeRobot itself installed.
"""
import platform
import huggingface_hub
HAS_HF_HUB = True
HAS_HF_DATASETS = True
HAS_NP = True
HAS_TORCH = True
HAS_LEROBOT = True
# import dataset
import numpy as np
import torch
try:
import huggingface_hub
except ImportError:
HAS_HF_HUB = False
from lerobot import __version__ as version
try:
import datasets
except ImportError:
HAS_HF_DATASETS = False
pt_version = torch.__version__
pt_cuda_available = torch.cuda.is_available()
pt_cuda_available = torch.cuda.is_available()
cuda_version = torch._C._cuda_getCompiledVersion() if torch.version.cuda is not None else "N/A"
try:
import numpy as np
except ImportError:
HAS_NP = False
try:
import torch
except ImportError:
HAS_TORCH = False
try:
import lerobot
except ImportError:
HAS_LEROBOT = False
lerobot_version = lerobot.__version__ if HAS_LEROBOT else "N/A"
hf_hub_version = huggingface_hub.__version__ if HAS_HF_HUB else "N/A"
hf_datasets_version = datasets.__version__ if HAS_HF_DATASETS else "N/A"
np_version = np.__version__ if HAS_NP else "N/A"
torch_version = torch.__version__ if HAS_TORCH else "N/A"
torch_cuda_available = torch.cuda.is_available() if HAS_TORCH else "N/A"
cuda_version = torch._C._cuda_getCompiledVersion() if HAS_TORCH and torch.version.cuda is not None else "N/A"
# TODO(aliberts): refactor into an actual command `lerobot env`
def display_sys_info() -> dict:
"""Run this to get basic system info to help for tracking issues & bugs."""
info = {
"`lerobot` version": version,
"`lerobot` version": lerobot_version,
"Platform": platform.platform(),
"Python version": platform.python_version(),
"Huggingface_hub version": huggingface_hub.__version__,
# TODO(aliberts): Add dataset when https://github.com/huggingface/lerobot/pull/73 is merged
# "Dataset version": dataset.__version__,
"Numpy version": np.__version__,
"PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
"Huggingface_hub version": hf_hub_version,
"Dataset version": hf_datasets_version,
"Numpy version": np_version,
"PyTorch version (GPU?)": f"{torch_version} ({torch_cuda_available})",
"Cuda version": cuda_version,
"Using GPU in script?": "<fill in>",
"Using distributed or parallel set-up in script?": "<fill in>",
# "Using distributed or parallel set-up in script?": "<fill in>",
}
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the last point.\n")
print(format_dict(info))
return info

View File

@@ -209,7 +209,7 @@ def eval_policy(
policy: torch.nn.Module,
n_episodes: int,
max_episodes_rendered: int = 0,
video_dir: Path | None = None,
videos_dir: Path | None = None,
return_episode_data: bool = False,
start_seed: int | None = None,
enable_progbar: bool = False,
@@ -221,7 +221,7 @@ def eval_policy(
policy: The policy.
n_episodes: The number of episodes to evaluate.
max_episodes_rendered: Maximum number of episodes to render into videos.
video_dir: Where to save rendered videos.
videos_dir: Where to save rendered videos.
return_episode_data: Whether to return episode data for online training. Incorporates the data into
the "episodes" key of the returned dictionary.
start_seed: The first seed to use for the first individual rollout. For all subsequent rollouts the
@@ -347,8 +347,8 @@ def eval_policy(
):
if n_episodes_rendered >= max_episodes_rendered:
break
video_dir.mkdir(parents=True, exist_ok=True)
video_path = video_dir / f"eval_episode_{n_episodes_rendered}.mp4"
videos_dir.mkdir(parents=True, exist_ok=True)
video_path = videos_dir / f"eval_episode_{n_episodes_rendered}.mp4"
video_paths.append(str(video_path))
thread = threading.Thread(
target=write_video,
@@ -503,9 +503,10 @@ def _compile_episode_data(
}
def eval(
def main(
pretrained_policy_path: str | None = None,
hydra_cfg_path: str | None = None,
out_dir: str | None = None,
config_overrides: list[str] | None = None,
):
assert (pretrained_policy_path is None) ^ (hydra_cfg_path is None)
@@ -513,12 +514,8 @@ def eval(
hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", config_overrides)
else:
hydra_cfg = init_hydra_config(hydra_cfg_path, config_overrides)
out_dir = (
f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
)
if out_dir is None:
raise NotImplementedError()
out_dir = f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
# Check device is available
device = get_safe_torch_device(hydra_cfg.device, log=True)
@@ -546,7 +543,7 @@ def eval(
policy,
hydra_cfg.eval.n_episodes,
max_episodes_rendered=10,
video_dir=Path(out_dir) / "eval",
videos_dir=Path(out_dir) / "videos",
start_seed=hydra_cfg.seed,
enable_progbar=True,
enable_inner_progbar=True,
@@ -586,6 +583,13 @@ if __name__ == "__main__":
),
)
parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
parser.add_argument(
"--out-dir",
help=(
"Where to save the evaluation outputs. If not provided, outputs are saved in "
"outputs/eval/{timestamp}_{env_name}_{policy_name}"
),
)
parser.add_argument(
"overrides",
nargs="*",
@@ -594,7 +598,7 @@ if __name__ == "__main__":
args = parser.parse_args()
if args.pretrained_policy_name_or_path is None:
eval(hydra_cfg_path=args.config, config_overrides=args.overrides)
main(hydra_cfg_path=args.config, out_dir=args.out_dir, config_overrides=args.overrides)
else:
try:
pretrained_policy_path = Path(
@@ -618,4 +622,8 @@ if __name__ == "__main__":
"repo ID, nor is it an existing local directory."
)
eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides)
main(
pretrained_policy_path=pretrained_policy_path,
out_dir=args.out_dir,
config_overrides=args.overrides,
)

View File

@@ -150,6 +150,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
grad_norm = info["grad_norm"]
lr = info["lr"]
update_s = info["update_s"]
dataloading_s = info["dataloading_s"]
# A sample is an (observation,action) pair, where observation and action
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
@@ -170,6 +171,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline):
f"lr:{lr:0.1e}",
# in seconds
f"updt_s:{update_s:.3f}",
f"data_s:{dataloading_s:.3f}", # if not ~0, you are bottlenecked by cpu or io
]
logging.info(" ".join(log_items))
@@ -325,6 +327,9 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
# Note: this helper will be used in offline and online training loops.
def evaluate_and_checkpoint_if_needed(step):
_num_digits = max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
step_identifier = f"{step:0{_num_digits}d}"
if cfg.training.eval_freq > 0 and step % cfg.training.eval_freq == 0:
logging.info(f"Eval policy at step {step}")
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext():
@@ -332,7 +337,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
eval_env,
policy,
cfg.eval.n_episodes,
video_dir=Path(out_dir) / "eval",
videos_dir=Path(out_dir) / "eval" / f"videos_step_{step_identifier}",
max_episodes_rendered=4,
start_seed=cfg.seed,
)
@@ -350,9 +355,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
policy,
optimizer,
lr_scheduler,
identifier=str(step).zfill(
max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps)))
),
identifier=step_identifier,
)
logging.info("Resume training")
@@ -382,7 +385,10 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
for _ in range(step, cfg.training.offline_steps):
if step == 0:
logging.info("Start offline training on a fixed dataset")
start_time = time.perf_counter()
batch = next(dl_iter)
dataloading_s = time.perf_counter() - start_time
for key in batch:
batch[key] = batch[key].to(device, non_blocking=True)
@@ -397,6 +403,8 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
use_amp=cfg.use_amp,
)
train_info["dataloading_s"] = dataloading_s
if step % cfg.training.log_freq == 0:
log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline=True)

View File

@@ -106,6 +106,7 @@ def visualize_dataset(
ws_port: int = 9087,
save: bool = False,
output_dir: Path | None = None,
root: Path | None = None,
) -> Path | None:
if save:
assert (
@@ -113,7 +114,7 @@ def visualize_dataset(
), "Set an output directory where to write .rrd files with `--output-dir path/to/directory`."
logging.info("Loading dataset")
dataset = LeRobotDataset(repo_id)
dataset = LeRobotDataset(repo_id, root=root)
logging.info("Loading dataloader")
episode_sampler = EpisodeSampler(dataset, episode_index)
@@ -256,6 +257,12 @@ def main():
help="Directory path to write a .rrd file when `--save 1` is set.",
)
parser.add_argument(
"--root",
type=str,
help="Root directory for a dataset stored on a local machine.",
)
args = parser.parse_args()
visualize_dataset(**vars(args))

View File

@@ -13,6 +13,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
import pytest
from lerobot.scripts.visualize_dataset import visualize_dataset
@@ -31,3 +33,20 @@ def test_visualize_dataset(tmpdir, repo_id):
output_dir=tmpdir,
)
assert rrd_path.exists()
@pytest.mark.parametrize(
"repo_id",
["lerobot/pusht"],
)
@pytest.mark.parametrize("root", [Path(__file__).parent / "data"])
def test_visualize_local_dataset(tmpdir, repo_id, root):
rrd_path = visualize_dataset(
repo_id,
episode_index=0,
batch_size=32,
save=True,
output_dir=tmpdir,
root=root,
)
assert rrd_path.exists()