layer2 commit
This commit is contained in:
42
layer2/rubbish/0Splitting.py
Normal file
42
layer2/rubbish/0Splitting.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#将文本拆分成问题和答案两部分
|
||||
|
||||
import json
|
||||
|
||||
input_file_path = '/home/ubuntu/50T/fsy/benchmark/dataset-ours/[Solution]qa_segment_all.json' # 修改为文件的实际路径
|
||||
with open(input_file_path, 'r', encoding='utf-8') as infile:
|
||||
data = json.load(infile)
|
||||
|
||||
# 遍历并处理数据
|
||||
processed_data = []
|
||||
for item in data:
|
||||
segment = item.get("segment", "")
|
||||
if "Solution" in segment:
|
||||
question, answer = segment.split("Solution", 1) # 按照 "Solution" 切分
|
||||
question = question.strip()
|
||||
answer = answer.strip()
|
||||
processed_data.append({
|
||||
"idx": item.get("idx"),
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
})
|
||||
elif "Answer" in segment:
|
||||
question, answer = segment.split("Answer", 1) # 按照 "Solution" 切分
|
||||
question = question.strip()
|
||||
answer = answer.strip()
|
||||
processed_data.append({
|
||||
"idx": item.get("idx"),
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
})
|
||||
else:
|
||||
processed_data.append({
|
||||
"idx": item.get("idx"),
|
||||
"question": "000",
|
||||
"answer": "000",
|
||||
})
|
||||
|
||||
output_file_path = '[Solution]qa_segment.json' # 输出文件路径
|
||||
with open(output_file_path, 'w', encoding='utf-8') as outfile:
|
||||
json.dump(processed_data, outfile, ensure_ascii=False, indent=4)
|
||||
|
||||
output_file_path
|
||||
Reference in New Issue
Block a user