robot_env: { # TODO change the path to the correct one rm_left_arm: '/home/rm/aloha/shadow_rm_aloha/config/rm_left_arm.yaml', rm_right_arm: '/home/rm/aloha/shadow_rm_aloha/config/rm_right_arm.yaml', arm_axis: 6, head_camera: '215222076892', bottom_camera: '215222076981', left_camera: '152122078151', right_camera: '152122073489', # init_left_arm_angle: [0.226, 21.180, 91.304, -0.515, 67.486, 2.374, 0.9], # init_right_arm_angle: [-1.056, 33.057, 84.376, -0.204, 66.357, -3.236, 0.9] init_left_arm_angle: [6.45, 66.093, 2.9, 20.919, -1.491, 100.756, 18.808, 0.617], init_right_arm_angle: [166.953, -33.575, -163.917, 73.3, -9.581, 69.51, 0.876] } dataset_dir: '/home/rm/aloha/shadow_rm_aloha/data/dataset/20250103' checkpoint_dir: '/home/rm/aloha/shadow_rm_act/data' # checkpoint_name: 'policy_best.ckpt' checkpoint_name: 'policy_9500.ckpt' state_dim: 14 save_episode: True num_rollouts: 50 #训练期间要收集的 rollout(轨迹)数量 real_robot: True policy_class: 'ACT' onscreen_render: False camera_names: ['cam_high', 'cam_low', 'cam_left', 'cam_right'] episode_len: 300 #episode 的最大长度(时间步数)。 task_name: 'aloha_01_11.28' temporal_agg: False #是否使用时间聚合 batch_size: 8 #训练期间每批的样本数。 seed: 1000 #随机种子。 chunk_size: 30 #用于处理序列的块大小 eval_every: 1 #每隔 eval_every 步评估一次模型。 num_steps: 10000 #训练的总步数。 validate_every: 1 #每隔 validate_every 步验证一次模型。 save_every: 500 #每隔 save_every 步保存一次检查点。 load_pretrain: False #是否加载预训练模型。 resume_ckpt_path: name_filter: # TODO skip_mirrored_data: False #是否跳过镜像数据(例如用于基于对称性的数据增强)。 stats_dir: sample_weights: train_ratio: 0.8 #用于训练的数据比例(其余数据用于验证) policy_config: { hidden_dim: 512, # Size of the embeddings (dimension of the transformer) state_dim: 14, # Dimension of the state position_embedding: 'sine', # ('sine', 'learned').Type of positional embedding to use on top of the image features lr_backbone: 1.0e-5, masks: False, # If true, the model masks the non-visible pixels backbone: 'resnet18', dilation: False, # If true, we replace stride with dilation in the last convolutional block (DC5) dropout: 0.1, # Dropout applied in the transformer nheads: 8, dim_feedforward: 3200, # Intermediate size of the feedforward layers in the transformer blocks enc_layers: 4, # Number of encoding layers in the transformer dec_layers: 7, # Number of decoding layers in the transformer pre_norm: False, # If true, apply LayerNorm to the input instead of the output of the MultiheadAttention and FeedForward num_queries: 30, camera_names: ['cam_high', 'cam_low', 'cam_left', 'cam_right'], vq: False, vq_class: none, vq_dim: 64, action_dim: 14, no_encoder: False, lr: 1.0e-5, weight_decay: 1.0e-4, kl_weight: 10, # lr_drop: 200, # clip_max_norm: 0.1, }