# @package _global_ n_action_steps: 2 n_obs_steps: 1 policy: name: tdmpc reward_scale: 1.0 episode_length: ${env.episode_length} discount: 0.9 modality: 'all' # pixels frame_stack: 1 num_channels: 32 img_size: ${env.image_size} state_dim: ??? action_dim: ??? # planning mpc: true iterations: 6 num_samples: 512 num_elites: 50 mixture_coef: 0.1 min_std: 0.05 max_std: 2.0 temperature: 0.5 momentum: 0.1 uncertainty_cost: 1 # actor log_std_min: -10 log_std_max: 2 # learning batch_size: 256 max_buffer_size: 10000 horizon: 5 reward_coef: 0.5 value_coef: 0.1 consistency_coef: 20 rho: 0.5 kappa: 0.1 lr: 3e-4 std_schedule: ${policy.min_std} horizon_schedule: ${policy.horizon} per: true per_alpha: 0.6 per_beta: 0.4 grad_clip_norm: 10 seed_steps: 0 update_freq: 2 tau: 0.01 utd: 1 # offline rl # dataset_dir: ??? data_first_percent: 1.0 is_data_clip: true data_clip_eps: 1e-5 expectile: 0.9 A_scaling: 3.0 # offline->online offline_steps: ${offline_steps} pretrained_model_path: "" # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt" # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt" balanced_sampling: true demo_schedule: 0.5 # architecture enc_dim: 256 num_q: 5 mlp_dim: 512 latent_dim: 50 delta_timestamps: observation.image: "[i / ${fps} for i in range(6)]" observation.state: "[i / ${fps} for i in range(6)]" action: "[i / ${fps} for i in range(5)]" next.reward: "[i / ${fps} for i in range(5)]"