Reward classifier and training (#528)

Co-authored-by: Daniel Ritchie <daniel@brainwavecollective.ai>
Co-authored-by: resolver101757 <kelster101757@hotmail.com>
Co-authored-by: Jannik Grothusen <56967823+J4nn1K@users.noreply.github.com>
Co-authored-by: Remi <re.cadene@gmail.com>
Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
This commit is contained in:
Yoel
2024-12-09 10:21:50 +01:00
committed by Eugene Mironov
parent 1aa8d4ac91
commit e35546f58e
10 changed files with 906 additions and 16 deletions

View File

@@ -191,6 +191,7 @@ def record(
single_task: str,
pretrained_policy_name_or_path: str | None = None,
policy_overrides: List[str] | None = None,
assign_rewards: bool = False,
fps: int | None = None,
warmup_time_s: int | float = 2,
episode_time_s: int | float = 10,
@@ -214,6 +215,9 @@ def record(
policy = None
device = None
use_amp = None
extra_features = (
{"next.reward": {"dtype": "int64", "shape": (1,), "names": None}} if assign_rewards else None
)
if single_task:
task = single_task
@@ -254,12 +258,12 @@ def record(
use_videos=video,
image_writer_processes=num_image_writer_processes,
image_writer_threads=num_image_writer_threads_per_camera * len(robot.cameras),
features=extra_features,
)
if not robot.is_connected:
robot.connect()
listener, events = init_keyboard_listener()
listener, events = init_keyboard_listener(assign_rewards=assign_rewards)
# Execute a few seconds without recording to:
# 1. teleoperate the robot to move it in starting position if no policy provided,
@@ -469,12 +473,12 @@ if __name__ == "__main__":
default=1,
help="Upload dataset to Hugging Face hub.",
)
parser_record.add_argument(
"--tags",
type=str,
nargs="*",
help="Add tags to your dataset on the hub.",
)
# parser_record.add_argument(
# "--tags",
# type=str,
# nargs="*",
# help="Add tags to your dataset on the hub.",
# )
parser_record.add_argument(
"--num-image-writer-processes",
type=int,
@@ -517,6 +521,12 @@ if __name__ == "__main__":
nargs="*",
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
)
parser_record.add_argument(
"--assign-rewards",
type=int,
default=0,
help="Enables the assignation of rewards to frames (by default no assignation). When enabled, assign a 0 reward to frames until the space bar is pressed which assign a 1 reward. Press the space bar a second time to assign a 0 reward. The reward assigned is reset to 0 when the episode ends.",
)
parser_replay = subparsers.add_parser("replay", parents=[base_parser])
parser_replay.add_argument(