生成选项采用上采样的方式,采样6次并让模型进行回答;将早停的认为困难,全部采样都回答正确的认为简单。基于此构造新的stepy

This commit is contained in:
lzy
2025-06-02 16:19:18 +08:00
parent d219b9b0c0
commit abeacaac3e
8 changed files with 169413 additions and 11331 deletions

View File

@@ -2,7 +2,7 @@
api:
key: "sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
base_url: "https://vip.apiyi.com/v1"
temperature: -1 # 默认使用模型的温度设置
temperature: 0 # 默认使用模型的温度设置
max_retries: 10
# 支持多个模型
models:
@@ -10,7 +10,6 @@ api:
- "gpt-4o"
- "deepseek-chat"
- "claude-sonnet-4-20250514"
- "deepseek-r1"
# 或者使用单个模型(向后兼容)
# model: "qwen-max-2025-01-25"
@@ -20,7 +19,8 @@ system_prompt: None
evaluation:
max_workers: 20
# input_file: "/home/ubuntu/50T/LYT/MatBench/layer1/ALL-merge/merged.json"
input_file: "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions.json"
# input_file: "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions.json"
input_file: "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions_filtered.json"
# 输出配置
output:
base_dir: "results"