质量筛选完成

This commit is contained in:
lzy
2025-05-29 16:18:16 +08:00
parent ae410dc6a7
commit 1156bfdd7c
3 changed files with 54443 additions and 19425 deletions

View File

@@ -447,9 +447,9 @@ def main_filter_questions():
"""主函数:筛选高质量题目"""
# 文件路径
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_enhanced_classified_questions.json" # 分类后的题目文件
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_filtered_high_quality_questions.json" # 筛选后的输出文件
ANALYSIS_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_selection_analysis.xlsx" # 分析报告
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step7_no_perp_convertible.json" # 分类后的题目文件
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepx_filtered_high_quality_questions.json" # 筛选后的输出文件
ANALYSIS_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepx_selection_analysis.xlsx" # 分析报告
# 加载数据
print("正在加载已分类的题目...")
@@ -468,7 +468,7 @@ def main_filter_questions():
# 第二步:智能抽样
print("\n第二步:智能抽样构建评测集...")
target_count = 2000 # 目标题目数
target_count = 2900 # 目标题目数
selected_questions = selector.smart_sample_questions(filtered_questions, target_count)
print(f"最终选择: {len(selected_questions)} 道题目")