trainer: nnodes: 1 n_gpus_per_node: 8 data: path: ~/data/rlhf/math/test.parquet prompt_key: prompt n_samples: 5 output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet batch_size: 128 model: path: ~/models/Qwen2-7B-Instruct external_lib: null rollout: name: vllm temperature: 1.0 top_k: 50 # 0 for hf rollout, -1 for vllm rollout top_p: 0.7 prompt_length: 1536 response_length: 512 # for vllm rollout dtype: bfloat16 # should align with FSDP gpu_memory_utilization: 0.5 ignore_eos: False micro_batch_size: 256 enforce_eager: True free_cache_engine: True load_format: dummy_dtensor tensor_model_parallel_size: 1 max_num_batched_tokens: 8192 max_num_seqs: 1024 log_prob_micro_batch_size: 8 # for hf rollout do_sample: True