质量筛选完成
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -447,9 +447,9 @@ def main_filter_questions():
|
||||
"""主函数:筛选高质量题目"""
|
||||
|
||||
# 文件路径
|
||||
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_enhanced_classified_questions.json" # 分类后的题目文件
|
||||
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_filtered_high_quality_questions.json" # 筛选后的输出文件
|
||||
ANALYSIS_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step4_selection_analysis.xlsx" # 分析报告
|
||||
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step7_no_perp_convertible.json" # 分类后的题目文件
|
||||
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepx_filtered_high_quality_questions.json" # 筛选后的输出文件
|
||||
ANALYSIS_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepx_selection_analysis.xlsx" # 分析报告
|
||||
|
||||
# 加载数据
|
||||
print("正在加载已分类的题目...")
|
||||
@@ -468,7 +468,7 @@ def main_filter_questions():
|
||||
|
||||
# 第二步:智能抽样
|
||||
print("\n第二步:智能抽样构建评测集...")
|
||||
target_count = 2000 # 目标题目数
|
||||
target_count = 2900 # 目标题目数
|
||||
selected_questions = selector.smart_sample_questions(filtered_questions, target_count)
|
||||
print(f"最终选择: {len(selected_questions)} 道题目")
|
||||
|
||||
|
||||
BIN
layer2/PGEE/code/stepx_selection_analysis.xlsx
Normal file
BIN
layer2/PGEE/code/stepx_selection_analysis.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user