update train script

This commit is contained in:
PeterGriffinJin
2025-04-09 19:31:20 +00:00
parent 8ceb0cd1bb
commit ba78b68eb4
2 changed files with 5 additions and 5 deletions

View File

@@ -43,7 +43,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
actor_rollout_ref.model.enable_gradient_checkpointing=true \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
actor_rollout_ref.actor.use_kl_loss=true \
actor_rollout_ref.actor.ppo_mini_batch_size=256 \
actor_rollout_ref.actor.ppo_micro_batch_size=64 \
@@ -73,7 +73,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
trainer.project_name=$WAND_PROJECT \
trainer.experiment_name=$EXPERIMENT_NAME \
trainer.total_epochs=15 \
trainer.total_training_steps=305 \
trainer.total_training_steps=1005 \
trainer.default_hdfs_dir=null \
trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
max_turns=2 \

View File

@@ -43,7 +43,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.model.enable_gradient_checkpointing=true \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
actor_rollout_ref.actor.ppo_mini_batch_size=256 \
actor_rollout_ref.actor.ppo_micro_batch_size=64 \
actor_rollout_ref.actor.fsdp_config.param_offload=true \
@@ -60,7 +60,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.state_masking=true \
critic.optim.lr=1e-5 \
critic.model.use_remove_padding=True \
critic.optim.lr_warmup_steps_ratio=0.05 \
critic.optim.lr_warmup_steps_ratio=0.015 \
critic.model.path=$BASE_MODEL \
critic.model.enable_gradient_checkpointing=true \
critic.ppo_micro_batch_size=8 \
@@ -81,7 +81,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
trainer.project_name=$WAND_PROJECT \
trainer.experiment_name=$EXPERIMENT_NAME \
trainer.total_epochs=15 \
trainer.total_training_steps=305 \
trainer.total_training_steps=1005 \
trainer.default_hdfs_dir=null \
trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
max_turns=2 \