lerobot/realman_src/realman_aloha/shadow_rm_act/config/config.yaml

robot_env: {
  # TODO change the path to the correct one
  rm_left_arm: '/home/rm/aloha/shadow_rm_aloha/config/rm_left_arm.yaml',
  rm_right_arm: '/home/rm/aloha/shadow_rm_aloha/config/rm_right_arm.yaml',
  arm_axis: 6,
  head_camera: '215222076892',
  bottom_camera: '215222076981',
  left_camera: '152122078151',
  right_camera: '152122073489',
  # init_left_arm_angle: [0.226, 21.180, 91.304, -0.515, 67.486, 2.374, 0.9],
  # init_right_arm_angle: [-1.056, 33.057, 84.376, -0.204, 66.357, -3.236, 0.9]
  init_left_arm_angle: [6.45, 66.093, 2.9, 20.919, -1.491, 100.756, 18.808, 0.617],
  init_right_arm_angle: [166.953, -33.575, -163.917, 73.3, -9.581, 69.51, 0.876]
}
dataset_dir: '/home/rm/aloha/shadow_rm_aloha/data/dataset/20250103'
checkpoint_dir: '/home/rm/aloha/shadow_rm_act/data'
# checkpoint_name: 'policy_best.ckpt'
checkpoint_name: 'policy_9500.ckpt'
state_dim: 14
save_episode: True
num_rollouts: 50                #训练期间要收集的 rollout（轨迹）数量
real_robot: True
policy_class: 'ACT'
onscreen_render: False
camera_names: ['cam_high', 'cam_low', 'cam_left', 'cam_right']
episode_len: 300        #episode 的最大长度（时间步数）。
task_name: 'aloha_01_11.28'
temporal_agg: False     #是否使用时间聚合
batch_size: 8         #训练期间每批的样本数。
seed: 1000            #随机种子。
chunk_size: 30      #用于处理序列的块大小
eval_every: 1           #每隔 eval_every 步评估一次模型。
num_steps: 10000        #训练的总步数。
validate_every: 1       #每隔 validate_every 步验证一次模型。
save_every: 500         #每隔 save_every 步保存一次检查点。
load_pretrain: False     #是否加载预训练模型。
resume_ckpt_path:
name_filter:  # TODO
skip_mirrored_data: False    #是否跳过镜像数据（例如用于基于对称性的数据增强）。
stats_dir:
sample_weights:
train_ratio: 0.8       #用于训练的数据比例（其余数据用于验证）

policy_config: {
  hidden_dim: 512, # Size of the embeddings (dimension of the transformer)
  state_dim: 14, # Dimension of the state
  position_embedding: 'sine', # ('sine', 'learned').Type of positional embedding to use on top of the image features
  lr_backbone: 1.0e-5,
  masks: False, # If true, the model masks the non-visible pixels
  backbone: 'resnet18',
  dilation: False, # If true, we replace stride with dilation in the last convolutional block (DC5)
  dropout: 0.1, # Dropout applied in the transformer
  nheads: 8,
  dim_feedforward: 3200, # Intermediate size of the feedforward layers in the transformer blocks
  enc_layers: 4, # Number of encoding layers in the transformer
  dec_layers: 7, # Number of decoding layers in the transformer
  pre_norm: False, # If true, apply LayerNorm to the input instead of the output of the MultiheadAttention and FeedForward
  num_queries: 30,
  camera_names: ['cam_high', 'cam_low', 'cam_left', 'cam_right'],
  vq: False,
  vq_class: none,
  vq_dim: 64,
  action_dim: 14,
  no_encoder: False,
  lr: 1.0e-5,
  weight_decay: 1.0e-4,
  kl_weight: 10,

  # lr_drop: 200,
  # clip_max_norm: 0.1,
}