layer2 commit
This commit is contained in:
91
layer2/rubbish/1.py
Normal file
91
layer2/rubbish/1.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import json
|
||||
import time
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
|
||||
base_url="https://vip.apiyi.com/v1"
|
||||
)
|
||||
|
||||
def load_qa_data(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
# 判断是否为计算题
|
||||
def classify_qa_type(question, answer):
|
||||
prompt = f"""
|
||||
Please analyze the following question and its answer, and classify the question type into one of the following four categories:
|
||||
|
||||
1. Calculation: A question that requires mathematical operations to derive the result.
|
||||
2. Multiple choice: A question that provides multiple options (e.g., A/B/C/D) for the respondent to choose from.
|
||||
3. True/False: A question that only requires answering true/false, yes/no, or correct/incorrect.
|
||||
3. Other: A question that does not fall under the above three categories.
|
||||
|
||||
Question:
|
||||
{question}
|
||||
Answer:
|
||||
{answer}
|
||||
|
||||
Please respond with the corresponding numeric code directly (without any explanation):
|
||||
2. For Calculation, respond: 1
|
||||
2. For Multiple choice, respond: 2
|
||||
3. For True/False, respond: 3
|
||||
4. For Other, respond: 4
|
||||
"""
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant"},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
stream=False
|
||||
)
|
||||
result = response.choices[0].message.content.strip().lower()
|
||||
print(result)
|
||||
if "1" in result:
|
||||
return 1
|
||||
elif "2" in result:
|
||||
return 2
|
||||
elif "3" in result:
|
||||
return 3
|
||||
else:
|
||||
return 4
|
||||
except Exception as e:
|
||||
print(f"API调用错误: {e}")
|
||||
# 如果API调用失败,默认为非简答题
|
||||
return 0
|
||||
|
||||
# 处理整个数据集并添加标签
|
||||
def process_dataset(data):
|
||||
total = len(data)
|
||||
for i, item in enumerate(data):
|
||||
print(f"处理第 {i+1}/{total} 条数据...")
|
||||
question = item["question"]
|
||||
answer = item["answer"]
|
||||
label = classify_qa_type(question, answer)
|
||||
|
||||
item["type"] = label
|
||||
|
||||
if (i+1) % 10 == 0:
|
||||
time.sleep(2)
|
||||
|
||||
return data
|
||||
|
||||
# 保存处理后的数据
|
||||
def save_processed_data(data, output_file):
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def main():
|
||||
input_file = "/home/ubuntu/50T/fsy/benchmark/3single_select.json"
|
||||
output_file = "4is_type.json"
|
||||
data = load_qa_data(input_file)
|
||||
processed_data = process_dataset(data)
|
||||
save_processed_data(processed_data, output_file)
|
||||
print(f"处理完成,结果已保存到 {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user