Add typos checks (#770)

This commit is contained in:
Simon Alibert
2025-02-25 23:51:15 +01:00
committed by GitHub
parent 8699a28be0
commit a1809ad3de
47 changed files with 114 additions and 82 deletions

View File

@@ -64,7 +64,7 @@ class ACTConfig(PreTrainedConfig):
output_normalization_modes: Similar dictionary as `normalize_input_modes`, but to unnormalize to the
original scale. Note that this is also used for normalizing the training targets.
vision_backbone: Name of the torchvision resnet backbone to use for encoding images.
pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone.
`None` means no pretrained weights.
replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated
convolution.

View File

@@ -68,7 +68,7 @@ class DiffusionConfig(PreTrainedConfig):
within the image size. If None, no cropping is done.
crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval
mode).
pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone.
`None` means no pretrained weights.
use_group_norm: Whether to replace batch normalization with group normalization in the backbone.
The group sizes are set to be about 16 (to be precise, feature_dim // 16).
@@ -99,7 +99,7 @@ class DiffusionConfig(PreTrainedConfig):
num_inference_steps: Number of reverse diffusion steps to use at inference time (steps are evenly
spaced). If not provided, this defaults to be the same as `num_train_timesteps`.
do_mask_loss_for_padding: Whether to mask the loss when there are copy-padded actions. See
`LeRobotDataset` and `load_previous_and_future_frames` for mor information. Note, this defaults
`LeRobotDataset` and `load_previous_and_future_frames` for more information. Note, this defaults
to False as the original Diffusion Policy implementation does the same.
"""

View File

@@ -2,7 +2,7 @@
Convert pi0 parameters from Jax to Pytorch
Follow [README of openpi](https://github.com/Physical-Intelligence/openpi) to create a new environment
and install the required librairies.
and install the required libraries.
```bash
cd ~/code/openpi

View File

@@ -76,7 +76,7 @@ class TDMPCConfig(PreTrainedConfig):
n_pi_samples: Number of samples to draw from the policy / world model rollout every CEM iteration. Can
be zero.
uncertainty_regularizer_coeff: Coefficient for the uncertainty regularization used when estimating
trajectory values (this is the λ coeffiecient in eqn 4 of FOWM).
trajectory values (this is the λ coefficient in eqn 4 of FOWM).
n_elites: The number of elite samples to use for updating the gaussian parameters every CEM iteration.
elite_weighting_temperature: The temperature to use for softmax weighting (by trajectory value) of the
elites, when updating the gaussian parameters for CEM.
@@ -165,7 +165,7 @@ class TDMPCConfig(PreTrainedConfig):
"""Input validation (not exhaustive)."""
if self.n_gaussian_samples <= 0:
raise ValueError(
f"The number of guassian samples for CEM should be non-zero. Got `{self.n_gaussian_samples=}`"
f"The number of gaussian samples for CEM should be non-zero. Got `{self.n_gaussian_samples=}`"
)
if self.normalization_mapping["ACTION"] is not NormalizationMode.MIN_MAX:
raise ValueError(

View File

@@ -66,7 +66,7 @@ class VQBeTConfig(PreTrainedConfig):
within the image size. If None, no cropping is done.
crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval
mode).
pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone.
`None` means no pretrained weights.
use_group_norm: Whether to replace batch normalization with group normalization in the backbone.
The group sizes are set to be about 16 (to be precise, feature_dim // 16).

View File

@@ -485,7 +485,7 @@ class VQBeTHead(nn.Module):
def forward(self, x, **kwargs) -> dict:
# N is the batch size, and T is number of action query tokens, which are process through same GPT
N, T, _ = x.shape
# we calculate N and T side parallely. Thus, the dimensions would be
# we calculate N and T side parallelly. Thus, the dimensions would be
# (batch size * number of action query tokens, action chunk size, action dimension)
x = einops.rearrange(x, "N T WA -> (N T) WA")
@@ -772,7 +772,7 @@ class VqVae(nn.Module):
Encoder and decoder are MLPs consisting of an input, output layer, and hidden layer, respectively.
The vq_layer uses residual VQs.
This class contains functions for training the encoder and decoder along with the residual VQ layer (for trainign phase 1),
This class contains functions for training the encoder and decoder along with the residual VQ layer (for training phase 1),
as well as functions to help BeT training part in training phase 2.
"""

View File

@@ -38,7 +38,7 @@ from lerobot.common.policies.vqbet.configuration_vqbet import VQBeTConfig
This file is part of a VQ-BeT that utilizes code from the following repositories:
- Vector Quantize PyTorch code is licensed under the MIT License:
Origianl source: https://github.com/lucidrains/vector-quantize-pytorch
Original source: https://github.com/lucidrains/vector-quantize-pytorch
- nanoGPT part is an adaptation of Andrej Karpathy's nanoGPT implementation in PyTorch.
Original source: https://github.com/karpathy/nanoGPT
@@ -289,7 +289,7 @@ class GPT(nn.Module):
This file is a part for Residual Vector Quantization that utilizes code from the following repository:
- Phil Wang's vector-quantize-pytorch implementation in PyTorch.
Origianl source: https://github.com/lucidrains/vector-quantize-pytorch
Original source: https://github.com/lucidrains/vector-quantize-pytorch
- The vector-quantize-pytorch code is licensed under the MIT License:
@@ -1349,9 +1349,9 @@ class EuclideanCodebook(nn.Module):
# calculate distributed variance
variance_numer = reduce((data - batch_mean) ** 2, "h n d -> h 1 d", "sum")
distributed.all_reduce(variance_numer)
batch_variance = variance_numer / num_vectors
variance_number = reduce((data - batch_mean) ** 2, "h n d -> h 1 d", "sum")
distributed.all_reduce(variance_number)
batch_variance = variance_number / num_vectors
self.update_with_decay("batch_variance", batch_variance, self.affine_param_batch_decay)