过滤掉题目令人困惑的部分,且可以转换为简答题的题目
This commit is contained in:
276
layer2/PGEE/code/step7_filter_perplexity_convert.py
Normal file
276
layer2/PGEE/code/step7_filter_perplexity_convert.py
Normal file
@@ -0,0 +1,276 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from tqdm import tqdm
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class QuestionFilter:
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化题目过滤器
|
||||
"""
|
||||
self.stats = {
|
||||
'total_questions': 0,
|
||||
'no_perp_convertible': 0,
|
||||
'no_perp_no_convertible': 0,
|
||||
'has_perp_convertible': 0,
|
||||
'has_perp_no_convertible': 0,
|
||||
'missing_fields': 0
|
||||
}
|
||||
|
||||
def filter_questions(self, questions: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
根据条件过滤题目
|
||||
|
||||
Args:
|
||||
questions: 题目列表
|
||||
|
||||
Returns:
|
||||
Dict: 包含不同类别题目的字典
|
||||
"""
|
||||
# 初始化结果字典
|
||||
filtered_questions = {
|
||||
'no_perp_convertible': [], # has_perplexity=False 且 convertible=True
|
||||
'no_perp_no_convertible': [], # has_perplexity=False 且 convertible=False
|
||||
'has_perp_convertible': [], # has_perplexity=True 且 convertible=True
|
||||
'has_perp_no_convertible': [], # has_perplexity=True 且 convertible=False
|
||||
'missing_fields': [] # 缺少必要字段的题目
|
||||
}
|
||||
|
||||
self.stats['total_questions'] = len(questions)
|
||||
|
||||
logger.info(f"开始过滤 {len(questions)} 道题目...")
|
||||
|
||||
# 使用进度条处理题目
|
||||
with tqdm(
|
||||
total=len(questions),
|
||||
desc="过滤题目",
|
||||
ncols=100,
|
||||
unit="题",
|
||||
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}] {postfix}"
|
||||
) as pbar:
|
||||
|
||||
for question in questions:
|
||||
try:
|
||||
# 获取必要的字段
|
||||
convertible = question.get("convertible")
|
||||
perplexity_info = question.get("perplexity", {})
|
||||
has_perplexity = perplexity_info.get("has_perplexity")
|
||||
|
||||
# 检查必要字段是否存在
|
||||
if convertible is None or has_perplexity is None:
|
||||
filtered_questions['missing_fields'].append(question)
|
||||
self.stats['missing_fields'] += 1
|
||||
pbar.set_postfix(status="缺少字段")
|
||||
else:
|
||||
# 根据条件分类
|
||||
if not has_perplexity and convertible:
|
||||
# has_perplexity=False 且 convertible=True
|
||||
filtered_questions['no_perp_convertible'].append(question)
|
||||
self.stats['no_perp_convertible'] += 1
|
||||
pbar.set_postfix(status="无困惑+可转换")
|
||||
|
||||
elif not has_perplexity and not convertible:
|
||||
# has_perplexity=False 且 convertible=False
|
||||
filtered_questions['no_perp_no_convertible'].append(question)
|
||||
self.stats['no_perp_no_convertible'] += 1
|
||||
pbar.set_postfix(status="无困惑+不可转换")
|
||||
|
||||
elif has_perplexity and convertible:
|
||||
# has_perplexity=True 且 convertible=True
|
||||
filtered_questions['has_perp_convertible'].append(question)
|
||||
self.stats['has_perp_convertible'] += 1
|
||||
pbar.set_postfix(status="有困惑+可转换")
|
||||
|
||||
elif has_perplexity and not convertible:
|
||||
# has_perplexity=True 且 convertible=False
|
||||
filtered_questions['has_perp_no_convertible'].append(question)
|
||||
self.stats['has_perp_no_convertible'] += 1
|
||||
pbar.set_postfix(status="有困惑+不可转换")
|
||||
|
||||
# 更新进度条
|
||||
pbar.update(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理题目时出错: {e}")
|
||||
filtered_questions['missing_fields'].append(question)
|
||||
self.stats['missing_fields'] += 1
|
||||
pbar.update(1)
|
||||
|
||||
logger.info("题目过滤完成!")
|
||||
return filtered_questions
|
||||
|
||||
def save_filtered_questions(self, filtered_questions: Dict[str, List[Dict[str, Any]]],
|
||||
output_dir: str = "."):
|
||||
"""
|
||||
保存过滤后的题目到不同的JSON文件
|
||||
|
||||
Args:
|
||||
filtered_questions: 过滤后的题目字典
|
||||
output_dir: 输出目录
|
||||
"""
|
||||
# 定义输出文件映射
|
||||
file_mappings = {
|
||||
'no_perp_convertible': f"{output_dir}/no_perp_convertible.json",
|
||||
'no_perp_no_convertible': f"{output_dir}/no_perp_no_convertible.json",
|
||||
'has_perp_convertible': f"{output_dir}/has_perp_convertible.json",
|
||||
'has_perp_no_convertible': f"{output_dir}/has_perp_no_convertible.json",
|
||||
'missing_fields': f"{output_dir}/missing_fields.json"
|
||||
}
|
||||
|
||||
logger.info("开始保存过滤后的题目...")
|
||||
|
||||
# 保存每个类别的题目
|
||||
for category, questions in filtered_questions.items():
|
||||
if questions: # 只保存非空的类别
|
||||
output_file = file_mappings[category]
|
||||
try:
|
||||
with tqdm(desc=f"保存 {category}", unit="题", total=len(questions)) as pbar:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(questions, f, ensure_ascii=False, indent=2)
|
||||
pbar.update(len(questions))
|
||||
|
||||
logger.info(f"已保存 {len(questions)} 道题目到: {output_file}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存文件 {output_file} 失败: {e}")
|
||||
else:
|
||||
logger.info(f"类别 {category} 没有题目,跳过保存")
|
||||
|
||||
def print_statistics(self):
|
||||
"""
|
||||
打印统计信息
|
||||
"""
|
||||
total = self.stats['total_questions']
|
||||
|
||||
logger.info("="*60)
|
||||
logger.info("题目过滤统计结果:")
|
||||
logger.info("="*60)
|
||||
logger.info(f"总题目数量: {total}")
|
||||
logger.info("")
|
||||
|
||||
logger.info("各类别题目数量:")
|
||||
logger.info(f" ✅ 无困惑 + 可转换: {self.stats['no_perp_convertible']:>6} ({self.stats['no_perp_convertible']/total*100:.1f}%)")
|
||||
logger.info(f" ❌ 无困惑 + 不可转换: {self.stats['no_perp_no_convertible']:>6} ({self.stats['no_perp_no_convertible']/total*100:.1f}%)")
|
||||
logger.info(f" ⚠️ 有困惑 + 可转换: {self.stats['has_perp_convertible']:>6} ({self.stats['has_perp_convertible']/total*100:.1f}%)")
|
||||
logger.info(f" 🚫 有困惑 + 不可转换: {self.stats['has_perp_no_convertible']:>6} ({self.stats['has_perp_no_convertible']/total*100:.1f}%)")
|
||||
logger.info(f" ❓ 缺少必要字段: {self.stats['missing_fields']:>6} ({self.stats['missing_fields']/total*100:.1f}%)")
|
||||
logger.info("")
|
||||
|
||||
# 验证总数
|
||||
calculated_total = (self.stats['no_perp_convertible'] +
|
||||
self.stats['no_perp_no_convertible'] +
|
||||
self.stats['has_perp_convertible'] +
|
||||
self.stats['has_perp_no_convertible'] +
|
||||
self.stats['missing_fields'])
|
||||
|
||||
logger.info(f"验证: 分类总数 = {calculated_total} (应该等于 {total})")
|
||||
logger.info("="*60)
|
||||
|
||||
# 重点关注的类别
|
||||
logger.info("📋 重点输出文件:")
|
||||
logger.info(f" • no_perp_convertible.json: {self.stats['no_perp_convertible']} 道题目 (理想的选择题)")
|
||||
logger.info(f" • no_perp_no_convertible.json: {self.stats['no_perp_no_convertible']} 道题目 (无法转换的题目)")
|
||||
|
||||
def analyze_sample_questions(self, filtered_questions: Dict[str, List[Dict[str, Any]]],
|
||||
sample_size: int = 3):
|
||||
"""
|
||||
分析并展示样本题目
|
||||
|
||||
Args:
|
||||
filtered_questions: 过滤后的题目字典
|
||||
sample_size: 每个类别展示的样本数量
|
||||
"""
|
||||
logger.info("\n📖 样本题目展示:")
|
||||
logger.info("="*60)
|
||||
|
||||
for category, questions in filtered_questions.items():
|
||||
if questions and category in ['no_perp_convertible', 'no_perp_no_convertible']:
|
||||
logger.info(f"\n【{category}】类别样本:")
|
||||
|
||||
sample_count = min(sample_size, len(questions))
|
||||
for i, question in enumerate(questions[:sample_count]):
|
||||
logger.info(f" 样本 {i+1}:")
|
||||
logger.info(f" 题目ID: {question.get('idx', 'N/A')}")
|
||||
logger.info(f" 原题目: {question.get('question', '')[:60]}...")
|
||||
|
||||
if question.get('choice_question'):
|
||||
logger.info(f" 转换后: {question.get('choice_question', '')[:60]}...")
|
||||
logger.info(f" 正确选项: {question.get('correct_option', '')[:40]}...")
|
||||
|
||||
logger.info(f" 可转换: {question.get('convertible', False)}")
|
||||
|
||||
perplexity_info = question.get('perplexity', {})
|
||||
logger.info(f" 有困惑: {perplexity_info.get('has_perplexity', False)}")
|
||||
|
||||
if perplexity_info.get('has_perplexity', False):
|
||||
logger.info(f" 困惑原因: {perplexity_info.get('perplexity_reason', '')[:50]}...")
|
||||
|
||||
logger.info(" " + "-"*40)
|
||||
|
||||
def load_questions(input_file: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
从JSON文件加载题目数据
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
|
||||
Returns:
|
||||
List: 题目列表
|
||||
"""
|
||||
try:
|
||||
with tqdm(desc="加载文件", unit="B", unit_scale=True) as pbar:
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
pbar.update(1)
|
||||
|
||||
logger.info(f"成功加载 {len(questions)} 道题目")
|
||||
return questions
|
||||
except Exception as e:
|
||||
logger.error(f"加载文件失败: {e}")
|
||||
return []
|
||||
|
||||
def main():
|
||||
"""
|
||||
主函数 - 执行题目过滤
|
||||
"""
|
||||
# ========== 配置区域 ==========
|
||||
# 文件路径配置
|
||||
INPUT_FILE = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/step6_perplexity_analyzed_questions.json" # 输入文件路径
|
||||
OUTPUT_DIR = "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code" # 输出目录
|
||||
# ============================
|
||||
|
||||
try:
|
||||
print("🔄 开始题目过滤...")
|
||||
|
||||
# 加载题目数据
|
||||
questions = load_questions(INPUT_FILE)
|
||||
if not questions:
|
||||
logger.error("没有加载到有效的题目数据")
|
||||
return
|
||||
|
||||
# 初始化过滤器
|
||||
filter_obj = QuestionFilter()
|
||||
|
||||
# 过滤题目
|
||||
filtered_questions = filter_obj.filter_questions(questions)
|
||||
|
||||
# 保存过滤后的题目
|
||||
filter_obj.save_filtered_questions(filtered_questions, OUTPUT_DIR)
|
||||
|
||||
# 打印统计信息
|
||||
filter_obj.print_statistics()
|
||||
|
||||
# 分析样本题目
|
||||
filter_obj.analyze_sample_questions(filtered_questions)
|
||||
|
||||
print("✅ 题目过滤完成!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"程序执行失败: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
7017
layer2/PGEE/code/step7_has_perp_convertible.json
Normal file
7017
layer2/PGEE/code/step7_has_perp_convertible.json
Normal file
File diff suppressed because it is too large
Load Diff
76891
layer2/PGEE/code/step7_no_perp_convertible.json
Normal file
76891
layer2/PGEE/code/step7_no_perp_convertible.json
Normal file
File diff suppressed because it is too large
Load Diff
21461
layer2/PGEE/code/step7_no_perp_no_convertible.json
Normal file
21461
layer2/PGEE/code/step7_no_perp_no_convertible.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user