生成选项采用上采样的方式,采样6次并让模型进行回答;将早停的认为困难,全部采样都回答正确的认为简单。基于此构造新的stepy
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
import random
|
||||
|
||||
def convert_to_target_format(source_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
@@ -54,23 +55,166 @@ def convert_to_target_format(source_data: Dict[str, Any]) -> Optional[Dict[str,
|
||||
|
||||
return target_data
|
||||
|
||||
def batch_convert_questions(input_file: str, output_file: str) -> None:
|
||||
def classify_questions_by_difficulty(questions: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
批量转换题目格式
|
||||
按难度分类题目
|
||||
|
||||
Args:
|
||||
questions: 题目列表
|
||||
|
||||
Returns:
|
||||
按难度分类的题目字典
|
||||
"""
|
||||
difficulty_groups = {
|
||||
"hard_early_stop": [], # 困难题(答错后早停)
|
||||
"easy_all_correct": [], # 简单题(所有采样都答对)
|
||||
"mixed": [], # 混合题(部分对部分错)
|
||||
"unknown": [] # 未知难度
|
||||
}
|
||||
|
||||
for question in questions:
|
||||
generated_options = question.get("generated_options", {})
|
||||
sampling_summary = generated_options.get("sampling_summary", {})
|
||||
difficulty_label = sampling_summary.get("difficulty_label", "unknown")
|
||||
|
||||
if difficulty_label in difficulty_groups:
|
||||
difficulty_groups[difficulty_label].append(question)
|
||||
else:
|
||||
difficulty_groups["unknown"].append(question)
|
||||
|
||||
return difficulty_groups
|
||||
|
||||
def select_questions_by_ratio(difficulty_groups: Dict[str, List[Dict[str, Any]]],
|
||||
selection_ratios: Dict[str, float],
|
||||
random_seed: Optional[int] = None) -> Tuple[List[Dict[str, Any]], Dict[str, int]]:
|
||||
"""
|
||||
按比例选择题目
|
||||
|
||||
Args:
|
||||
difficulty_groups: 按难度分类的题目
|
||||
selection_ratios: 各难度等级的选择比例 (0.0-1.0)
|
||||
random_seed: 随机种子
|
||||
|
||||
Returns:
|
||||
选中的题目列表和选择统计信息
|
||||
"""
|
||||
if random_seed is not None:
|
||||
random.seed(random_seed)
|
||||
|
||||
selected_questions = []
|
||||
selection_stats = {}
|
||||
|
||||
for difficulty, questions in difficulty_groups.items():
|
||||
total_count = len(questions)
|
||||
ratio = selection_ratios.get(difficulty, 0.0)
|
||||
|
||||
# 计算要选择的题目数量
|
||||
if ratio <= 0:
|
||||
selected_count = 0
|
||||
elif ratio >= 1:
|
||||
selected_count = total_count
|
||||
else:
|
||||
selected_count = int(total_count * ratio)
|
||||
|
||||
# 随机选择题目
|
||||
if selected_count > 0 and total_count > 0:
|
||||
if selected_count >= total_count:
|
||||
selected = questions
|
||||
else:
|
||||
selected = random.sample(questions, selected_count)
|
||||
selected_questions.extend(selected)
|
||||
else:
|
||||
selected = []
|
||||
|
||||
# 记录统计信息
|
||||
selection_stats[difficulty] = {
|
||||
"total": total_count,
|
||||
"selected": len(selected),
|
||||
"ratio_target": ratio,
|
||||
"ratio_actual": len(selected) / total_count if total_count > 0 else 0
|
||||
}
|
||||
|
||||
# 打乱最终题目顺序
|
||||
random.shuffle(selected_questions)
|
||||
|
||||
return selected_questions, selection_stats
|
||||
|
||||
def batch_convert_questions_with_difficulty_filter(input_file: str,
|
||||
output_file: str,
|
||||
selection_ratios: Dict[str, float],
|
||||
random_seed: Optional[int] = None) -> None:
|
||||
"""
|
||||
批量转换题目格式,支持按难度筛选
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
output_file: 输出文件路径
|
||||
selection_ratios: 各难度等级的选择比例
|
||||
random_seed: 随机种子
|
||||
"""
|
||||
print("正在加载数据...")
|
||||
|
||||
# 判断输入文件格式
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
source_questions = json.load(f)
|
||||
data = json.load(f)
|
||||
|
||||
# 处理两种可能的输入格式
|
||||
if isinstance(data, dict) and "questions" in data:
|
||||
# 格式:{"questions": [...], "其他字段": ...}
|
||||
source_questions = data["questions"]
|
||||
print(f"检测到完整格式数据,包含其他元数据")
|
||||
elif isinstance(data, list):
|
||||
# 格式:[{题目1}, {题目2}, ...]
|
||||
source_questions = data
|
||||
print(f"检测到题目列表格式")
|
||||
else:
|
||||
raise ValueError("不支持的输入文件格式")
|
||||
|
||||
print(f"加载了 {len(source_questions)} 道题目")
|
||||
|
||||
# 按难度分类题目
|
||||
print("正在按难度分类题目...")
|
||||
difficulty_groups = classify_questions_by_difficulty(source_questions)
|
||||
|
||||
print("题目难度分布:")
|
||||
total_multiple_choice = 0
|
||||
for difficulty, questions in difficulty_groups.items():
|
||||
# 统计该难度下的单选题数量
|
||||
mc_count = sum(1 for q in questions
|
||||
if q.get("generated_options", {}).get("question_type") == "multiple_choice")
|
||||
total_multiple_choice += mc_count
|
||||
print(f" {difficulty}: {len(questions)} 道总题目, {mc_count} 道单选题")
|
||||
|
||||
print(f"可转换的单选题总数: {total_multiple_choice}")
|
||||
|
||||
# 按比例选择题目
|
||||
print("\n正在按比例选择题目...")
|
||||
print("选择比例设置:")
|
||||
for difficulty, ratio in selection_ratios.items():
|
||||
if difficulty in difficulty_groups:
|
||||
print(f" {difficulty}: {ratio*100:.1f}%")
|
||||
|
||||
selected_questions, selection_stats = select_questions_by_ratio(
|
||||
difficulty_groups, selection_ratios, random_seed
|
||||
)
|
||||
|
||||
print(f"\n题目选择结果:")
|
||||
total_selected = 0
|
||||
for difficulty, stats in selection_stats.items():
|
||||
print(f" {difficulty}:")
|
||||
print(f" 总数: {stats['total']}")
|
||||
print(f" 选中: {stats['selected']}")
|
||||
print(f" 目标比例: {stats['ratio_target']*100:.1f}%")
|
||||
print(f" 实际比例: {stats['ratio_actual']*100:.1f}%")
|
||||
total_selected += stats['selected']
|
||||
|
||||
print(f"总共选中: {total_selected} 道题目")
|
||||
|
||||
# 转换选中的题目
|
||||
print("\n正在转换题目格式...")
|
||||
converted_questions = []
|
||||
conversion_stats = {
|
||||
"total": len(source_questions),
|
||||
"selected": total_selected,
|
||||
"multiple_choice": 0,
|
||||
"true_false": 0,
|
||||
"other": 0,
|
||||
@@ -78,7 +222,7 @@ def batch_convert_questions(input_file: str, output_file: str) -> None:
|
||||
"failed": 0
|
||||
}
|
||||
|
||||
for i, question in enumerate(source_questions):
|
||||
for i, question in enumerate(selected_questions):
|
||||
try:
|
||||
# 统计题目类型
|
||||
generated_options = question.get("generated_options", {})
|
||||
@@ -105,18 +249,29 @@ def batch_convert_questions(input_file: str, output_file: str) -> None:
|
||||
|
||||
# 保存结果
|
||||
print("正在保存转换结果...")
|
||||
output_data = {
|
||||
"questions": converted_questions,
|
||||
"metadata": {
|
||||
"total_original_questions": len(source_questions),
|
||||
"selection_ratios": selection_ratios,
|
||||
"selection_stats": selection_stats,
|
||||
"conversion_stats": conversion_stats,
|
||||
"random_seed": random_seed
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(converted_questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 打印统计信息
|
||||
# 打印最终统计信息
|
||||
print(f"\n转换完成!")
|
||||
print(f"总题目数: {conversion_stats['total']}")
|
||||
print(f"选中题目数: {conversion_stats['selected']}")
|
||||
print(f"单选题: {conversion_stats['multiple_choice']}")
|
||||
print(f"判断题: {conversion_stats['true_false']}")
|
||||
print(f"其他类型: {conversion_stats['other']}")
|
||||
print(f"成功转换: {conversion_stats['converted']}")
|
||||
print(f"转换失败: {conversion_stats['failed']}")
|
||||
print(f"转换率: {conversion_stats['converted']/conversion_stats['total']*100:.1f}%")
|
||||
print(f"最终转换率: {conversion_stats['converted']/conversion_stats['selected']*100:.1f}%")
|
||||
print(f"结果已保存到: {output_file}")
|
||||
|
||||
def validate_converted_questions(questions: List[Dict[str, Any]]) -> Dict[str, int]:
|
||||
@@ -172,24 +327,80 @@ def validate_converted_questions(questions: List[Dict[str, Any]]) -> Dict[str, i
|
||||
|
||||
return stats
|
||||
|
||||
def create_difficulty_config_template():
|
||||
"""创建难度配置模板"""
|
||||
template = {
|
||||
"hard_early_stop": 1.0, # 困难题选择100%
|
||||
"easy_all_correct": 0.1, # 简单题选择10%
|
||||
"mixed": 0.5, # 混合题选择50%
|
||||
"unknown": 0.0 # 未知难度题目选择0%
|
||||
}
|
||||
|
||||
print("难度选择比例配置模板:")
|
||||
print(json.dumps(template, indent=2))
|
||||
print("\n说明:")
|
||||
print("- 1.0 = 100% (全部选择)")
|
||||
print("- 0.5 = 50% (选择一半)")
|
||||
print("- 0.1 = 10% (选择10%)")
|
||||
print("- 0.0 = 0% (不选择)")
|
||||
|
||||
return template
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
# 文件路径配置
|
||||
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepy_complete_choice_questions.json"
|
||||
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions.json"
|
||||
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepy_complete_choice_questions_with_sampling.json"
|
||||
OUTPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions_filtered.json"
|
||||
|
||||
# 难度选择比例配置
|
||||
# 可以根据需要调整这些比例
|
||||
SELECTION_RATIOS = {
|
||||
"hard_early_stop": 1.0, # 困难题选择100% (全部)
|
||||
"easy_all_correct": 0.0, # 简单题选择10%
|
||||
"mixed": 0.0, # 混合题选择30%
|
||||
"unknown": 0.0 # 未知难度不选择
|
||||
}
|
||||
|
||||
# 随机种子,保证结果可复现
|
||||
RANDOM_SEED = 42
|
||||
|
||||
try:
|
||||
# 批量转换
|
||||
batch_convert_questions(INPUT_FILE, OUTPUT_FILE)
|
||||
# 显示配置信息
|
||||
print("=== 难度筛选配置 ===")
|
||||
print("选择比例:")
|
||||
for difficulty, ratio in SELECTION_RATIOS.items():
|
||||
print(f" {difficulty}: {ratio*100:.1f}%")
|
||||
print(f"随机种子: {RANDOM_SEED}")
|
||||
print()
|
||||
|
||||
# 批量转换(包含难度筛选)
|
||||
batch_convert_questions_with_difficulty_filter(
|
||||
INPUT_FILE,
|
||||
OUTPUT_FILE,
|
||||
SELECTION_RATIOS,
|
||||
RANDOM_SEED
|
||||
)
|
||||
|
||||
# 验证转换结果
|
||||
print("\n正在验证转换结果...")
|
||||
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
|
||||
converted_questions = json.load(f)
|
||||
result_data = json.load(f)
|
||||
|
||||
# 检查输出文件格式
|
||||
if "questions" in result_data:
|
||||
converted_questions = result_data["questions"]
|
||||
metadata = result_data.get("metadata", {})
|
||||
|
||||
print("\n=== 元数据信息 ===")
|
||||
if metadata:
|
||||
print(f"原始题目总数: {metadata.get('total_original_questions', 'N/A')}")
|
||||
print(f"随机种子: {metadata.get('random_seed', 'N/A')}")
|
||||
else:
|
||||
converted_questions = result_data
|
||||
|
||||
validation_stats = validate_converted_questions(converted_questions)
|
||||
|
||||
print(f"\n验证结果:")
|
||||
print(f"\n=== 验证结果 ===")
|
||||
print(f"总题目数: {validation_stats['total']}")
|
||||
print(f"格式正确: {validation_stats['valid']}")
|
||||
print(f"格式错误: {validation_stats['invalid']}")
|
||||
@@ -203,42 +414,83 @@ def main():
|
||||
|
||||
except Exception as e:
|
||||
print(f"程序执行失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def test_single_conversion():
|
||||
"""测试单个题目转换"""
|
||||
# 测试数据
|
||||
test_data = {
|
||||
"idx": 3154,
|
||||
"question": "In stable ZrO2 material, cations form an fcc structure, and anions occupy tetrahedral interstitial sites. If 20 mol% CaO is added, calculate the percentage of occupied tetrahedral interstitial sites.",
|
||||
"answer": "Zr4+ and Ca2+ cations occupy the face-centered cubic lattice sites. 100 cations can form 25 unit cells, with a total of 25×8=200 tetrahedral interstitial sites. Therefore, the percentage of occupied tetrahedral interstitial sites is 180÷200=90%.",
|
||||
"question_type": "calculation",
|
||||
"correct_option": "90%",
|
||||
"choice_question": "In stable ZrO2 material, cations form an fcc structure, and anions occupy tetrahedral interstitial sites. If 20 mol% CaO is added, what is the percentage of occupied tetrahedral interstitial sites?",
|
||||
"generated_options": {
|
||||
"question_type": "multiple_choice",
|
||||
"options": {
|
||||
"A": "80%",
|
||||
"B": "90%",
|
||||
"C": "50%",
|
||||
"D": "75%"
|
||||
},
|
||||
"correct_answer": "B",
|
||||
"explanation": "正确答案90%基于:1) fcc中四面体间隙数量是阳离子的2倍;2) 20 mol% CaO掺杂产生20%氧空位;3) 被占据间隙位比例=(原始占据数-空位数)/总间隙位数。"
|
||||
},
|
||||
"generation_status": "success"
|
||||
def interactive_config():
|
||||
"""交互式配置选择比例"""
|
||||
print("=== 交互式难度选择配置 ===")
|
||||
|
||||
difficulties = ["hard_early_stop", "easy_all_correct", "mixed", "unknown"]
|
||||
difficulty_names = {
|
||||
"hard_early_stop": "困难题(答错早停)",
|
||||
"easy_all_correct": "简单题(全部答对)",
|
||||
"mixed": "混合题(部分对错)",
|
||||
"unknown": "未知难度题"
|
||||
}
|
||||
|
||||
# 测试转换
|
||||
result = convert_to_target_format(test_data)
|
||||
if result:
|
||||
print("转换成功!")
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
ratios = {}
|
||||
|
||||
for diff in difficulties:
|
||||
while True:
|
||||
try:
|
||||
ratio_input = input(f"请输入{difficulty_names.get(diff, diff)}的选择比例 (0-100%): ").strip()
|
||||
if ratio_input.endswith('%'):
|
||||
ratio_input = ratio_input[:-1]
|
||||
|
||||
ratio_percent = float(ratio_input)
|
||||
if 0 <= ratio_percent <= 100:
|
||||
ratios[diff] = ratio_percent / 100.0
|
||||
break
|
||||
else:
|
||||
print("请输入0-100之间的数值")
|
||||
except ValueError:
|
||||
print("请输入有效的数值")
|
||||
|
||||
print("\n配置结果:")
|
||||
for diff, ratio in ratios.items():
|
||||
print(f" {difficulty_names.get(diff, diff)}: {ratio*100:.1f}%")
|
||||
|
||||
return ratios
|
||||
|
||||
def test_difficulty_distribution(input_file: str):
|
||||
"""测试文件中的难度分布"""
|
||||
print(f"正在分析文件难度分布: {input_file}")
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 处理两种可能的输入格式
|
||||
if isinstance(data, dict) and "questions" in data:
|
||||
questions = data["questions"]
|
||||
elif isinstance(data, list):
|
||||
questions = data
|
||||
else:
|
||||
print("转换失败!")
|
||||
print("不支持的文件格式")
|
||||
return
|
||||
|
||||
difficulty_groups = classify_questions_by_difficulty(questions)
|
||||
|
||||
print(f"\n难度分布分析:")
|
||||
print(f"总题目数: {len(questions)}")
|
||||
|
||||
for difficulty, question_list in difficulty_groups.items():
|
||||
mc_count = sum(1 for q in question_list
|
||||
if q.get("generated_options", {}).get("question_type") == "multiple_choice")
|
||||
print(f" {difficulty}:")
|
||||
print(f" 总数: {len(question_list)}")
|
||||
print(f" 单选题: {mc_count}")
|
||||
print(f" 占比: {len(question_list)/len(questions)*100:.1f}%")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 可以先运行测试
|
||||
# test_single_conversion()
|
||||
# 可以先测试难度分布
|
||||
# test_difficulty_distribution("/path/to/your/input/file.json")
|
||||
|
||||
# 可以使用交互式配置
|
||||
# ratios = interactive_config()
|
||||
|
||||
# 运行主程序
|
||||
main()
|
||||
|
||||
# 显示配置模板
|
||||
# create_difficulty_config_template()
|
||||
|
||||
Reference in New Issue
Block a user