From 345bc8b2d00fbd85bc1ec77d2685cc0a892c4588 Mon Sep 17 00:00:00 2001 From: mshukor Date: Mon, 2 Jun 2025 20:51:37 +0200 Subject: [PATCH] precommit --- .../policies/smolvla/configuration_smolvla.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lerobot/common/policies/smolvla/configuration_smolvla.py b/lerobot/common/policies/smolvla/configuration_smolvla.py index 5f3669d6..822c0f40 100644 --- a/lerobot/common/policies/smolvla/configuration_smolvla.py +++ b/lerobot/common/policies/smolvla/configuration_smolvla.py @@ -82,10 +82,10 @@ class SmolVLAConfig(PreTrainedConfig): scheduler_decay_steps: int = 30_000 scheduler_decay_lr: float = 2.5e-6 - vlm_model_name: str = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" # Select the VLM backbone. - load_vlm_weights: bool = False # Set to True in case of training the expert from scratch. True when init from pretrained SmolVLA weights + vlm_model_name: str = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" # Select the VLM backbone. + load_vlm_weights: bool = False # Set to True in case of training the expert from scratch. True when init from pretrained SmolVLA weights - add_image_special_tokens: bool = False # Whether to use special image tokens around image features. + add_image_special_tokens: bool = False # Whether to use special image tokens around image features. attention_mode: str = "cross_attn" @@ -93,10 +93,10 @@ class SmolVLAConfig(PreTrainedConfig): pad_language_to: str = "longest" # "max_length" - num_expert_layers: int = -1 # Less or equal to 0 is the default where the action expert has the same number of layers of VLM. Otherwise the expert have less layers. - num_vlm_layers: int = 16 # Number of layers used in the VLM (first num_vlm_layers layers) - self_attn_every_n_layers: int = 2 # Interleave SA layers each self_attn_every_n_layers - expert_width_multiplier: float = 0.75 # The action expert hidden size (wrt to the VLM) + num_expert_layers: int = -1 # Less or equal to 0 is the default where the action expert has the same number of layers of VLM. Otherwise the expert have less layers. + num_vlm_layers: int = 16 # Number of layers used in the VLM (first num_vlm_layers layers) + self_attn_every_n_layers: int = 2 # Interleave SA layers each self_attn_every_n_layers + expert_width_multiplier: float = 0.75 # The action expert hidden size (wrt to the VLM) def __post_init__(self): super().__post_init__()