layer2 commit
This commit is contained in:
78
layer2/process/step4.py
Normal file
78
layer2/process/step4.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
对821道英文问题进行处理
|
||||
1. 判断是否包含多个子问题,将问题拆分为完整子问题(去掉推理过程,只保留最后结果)
|
||||
2. 判断题目类型
|
||||
3. 将题目做成选择题
|
||||
对计算题,在数值附近随机生成三个相似答案作为错误选项
|
||||
对简答题,与标准答案最相近的其他问题的答案作为三个错误选项
|
||||
4. 将正确和错误选项随机打乱,生成ABCD选择题的模型
|
||||
5. 添加prompt,并将选择题包裹在[ANSWER]<answer>[/ANSWER]标签中
|
||||
6. 模型打分
|
||||
"""
|
||||
import json
|
||||
import random
|
||||
from typing import List, Dict
|
||||
|
||||
def process_json_file(file_path: str) -> List[Dict]:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
# 收集所有选项
|
||||
options = [
|
||||
item['answer'],
|
||||
item.get('wrong_answers_1', ''),
|
||||
item.get('wrong_answers_2', ''),
|
||||
item.get('wrong_answers_3', '')
|
||||
]
|
||||
|
||||
# 过滤掉空选项
|
||||
options = [opt for opt in options if opt]
|
||||
|
||||
# 打乱选项
|
||||
random.shuffle(options)
|
||||
|
||||
# 找出正确答案的位置
|
||||
correct_answer_index = options.index(item['answer'])
|
||||
correct_answer_letter = chr(65 + correct_answer_index) # A, B, C, or D
|
||||
|
||||
# 构建选项文本
|
||||
options_text = ""
|
||||
for i, option in enumerate(options):
|
||||
letter = chr(65 + i) # A, B, C, or D
|
||||
options_text += f"({letter}){option}"
|
||||
if i < len(options) - 1:
|
||||
options_text += " "
|
||||
|
||||
# 更新问题和答案
|
||||
item['question'] = f"{"The following is a question about Fundamentals of Materials Science"}{item['question']} {options_text}{"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER].\nFor example, '[ANSWER]<answer>[/ANSWER]', where <answer> is comma- or space-separated list of the correct letters. Always answer in exactly this format of comma-separated letters between the two tags, even if you are unsure. We require this because we use automatic parsing."}"
|
||||
item['answer'] = f"[ANSWER]{correct_answer_letter}[/ANSWER]"
|
||||
|
||||
# 删除原始的错误选项
|
||||
if 'wrong_answers_1' in item:
|
||||
del item['wrong_answers_1']
|
||||
if 'wrong_answers_2' in item:
|
||||
del item['wrong_answers_2']
|
||||
if 'wrong_answers_3' in item:
|
||||
del item['wrong_answers_3']
|
||||
|
||||
return data
|
||||
|
||||
def save_processed_data(data: List[Dict], output_path: str) -> None:
|
||||
"""
|
||||
保存处理后的数据到新的JSON文件
|
||||
"""
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
input_file = "/home/ubuntu/50T/fsy/5_1.json" # 替换为你的输入文件路径
|
||||
output_file = "output.json" # 替换为你想要的输出文件路径
|
||||
|
||||
try:
|
||||
processed_data = process_json_file(input_file)
|
||||
save_processed_data(processed_data, output_file)
|
||||
print(f"处理完成!结果已保存到 {output_file}")
|
||||
except Exception as e:
|
||||
print(f"处理过程中出现错误: {e}")
|
||||
Reference in New Issue
Block a user