From ba78b68eb48bddfbe83880ff3273bb14adba718f Mon Sep 17 00:00:00 2001 From: PeterGriffinJin Date: Wed, 9 Apr 2025 19:31:20 +0000 Subject: [PATCH] update train script --- train_grpo.sh | 4 ++-- train_ppo.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/train_grpo.sh b/train_grpo.sh index 0b69431..119d348 100644 --- a/train_grpo.sh +++ b/train_grpo.sh @@ -43,7 +43,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ actor_rollout_ref.model.enable_gradient_checkpointing=true \ actor_rollout_ref.model.use_remove_padding=True \ actor_rollout_ref.actor.optim.lr=1e-6 \ - actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \ + actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \ actor_rollout_ref.actor.use_kl_loss=true \ actor_rollout_ref.actor.ppo_mini_batch_size=256 \ actor_rollout_ref.actor.ppo_micro_batch_size=64 \ @@ -73,7 +73,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ trainer.project_name=$WAND_PROJECT \ trainer.experiment_name=$EXPERIMENT_NAME \ trainer.total_epochs=15 \ - trainer.total_training_steps=305 \ + trainer.total_training_steps=1005 \ trainer.default_hdfs_dir=null \ trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \ max_turns=2 \ diff --git a/train_ppo.sh b/train_ppo.sh index 64bd8b1..961fa6e 100644 --- a/train_ppo.sh +++ b/train_ppo.sh @@ -43,7 +43,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ actor_rollout_ref.actor.optim.lr=1e-6 \ actor_rollout_ref.model.enable_gradient_checkpointing=true \ actor_rollout_ref.model.use_remove_padding=True \ - actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \ + actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \ actor_rollout_ref.actor.ppo_mini_batch_size=256 \ actor_rollout_ref.actor.ppo_micro_batch_size=64 \ actor_rollout_ref.actor.fsdp_config.param_offload=true \ @@ -60,7 +60,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ actor_rollout_ref.actor.state_masking=true \ critic.optim.lr=1e-5 \ critic.model.use_remove_padding=True \ - critic.optim.lr_warmup_steps_ratio=0.05 \ + critic.optim.lr_warmup_steps_ratio=0.015 \ critic.model.path=$BASE_MODEL \ critic.model.enable_gradient_checkpointing=true \ critic.ppo_micro_batch_size=8 \ @@ -81,7 +81,7 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ trainer.project_name=$WAND_PROJECT \ trainer.experiment_name=$EXPERIMENT_NAME \ trainer.total_epochs=15 \ - trainer.total_training_steps=305 \ + trainer.total_training_steps=1005 \ trainer.default_hdfs_dir=null \ trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \ max_turns=2 \