second commit
This commit is contained in:
61
layer1/ALL/SciEval-process.py
Normal file
61
layer1/ALL/SciEval-process.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import json
|
||||
|
||||
# 读取 JSON 文件
|
||||
def transform_json(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
new_json=[]
|
||||
|
||||
for item in data:
|
||||
question = item["question"]
|
||||
answer = item["answer"][0]
|
||||
|
||||
parts = question.split("\n\nA.")
|
||||
new_question = parts[0].strip()
|
||||
options_block = "A." + parts[1]
|
||||
|
||||
texts = []
|
||||
labels = []
|
||||
label_order = ["A", "B", "C", "D"]
|
||||
|
||||
current_label_index = 0
|
||||
while current_label_index < len(label_order):
|
||||
current_label = label_order[current_label_index]
|
||||
next_label = label_order[current_label_index + 1] if current_label_index + 1 < len(label_order) else None
|
||||
|
||||
if next_label:
|
||||
parts = options_block.split(f"\n{next_label}.")
|
||||
current_option = parts[0].replace(f"{current_label}.", "").strip() # 去掉标签前缀并移除多余空格
|
||||
options_block = next_label + "." + parts[1] # 剩余的内容保留
|
||||
else:
|
||||
current_option = options_block.replace(f"{current_label}.", "").strip() # 如果没有下一个标签,移除\n\nAnswer:
|
||||
current_option = current_option.replace("\n\nAnswer:", "")
|
||||
|
||||
|
||||
# 添加当前选项到 texts 和 labels
|
||||
texts.append(current_option)
|
||||
labels.append(current_label)
|
||||
|
||||
# 递增标签索引
|
||||
current_label_index += 1
|
||||
|
||||
transformed_data = {
|
||||
"question": new_question,
|
||||
"choices": {
|
||||
"text": texts,
|
||||
"label": labels
|
||||
},
|
||||
"answer": f"[ANSWER]{answer}[/ANSWER]",
|
||||
"prompt": "You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
|
||||
}
|
||||
new_json.append(transformed_data)
|
||||
|
||||
return new_json
|
||||
|
||||
input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/SciEval-valid-mat.json'
|
||||
output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/13-Scieval-val.json'
|
||||
transformed_data = transform_json(input_path)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(transformed_data, f, ensure_ascii= False, indent=2)
|
||||
Reference in New Issue
Block a user