Refactor push_dataset_to_hub (#118)

This commit is contained in:
Remi
2024-04-30 14:25:41 +02:00
committed by GitHub
parent 2765877f28
commit e4e739f4f8
25 changed files with 1089 additions and 1192 deletions

View File

@@ -1,3 +1,8 @@
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import numpy
import PIL
import torch
@@ -18,3 +23,16 @@ def concatenate_episodes(ep_dicts):
total_frames = data_dict["frame_index"].shape[0]
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict
def save_images_concurrently(imgs_array: numpy.array, out_dir: Path, max_workers: int = 4):
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
def save_image(img_array, i, out_dir):
img = PIL.Image.fromarray(img_array)
img.save(str(out_dir / f"frame_{i:06d}.png"), quality=100)
num_images = len(imgs_array)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
[executor.submit(save_image, imgs_array[i], i, out_dir) for i in range(num_images)]