新增rag eval

This commit is contained in:
2025-02-04 12:48:54 +08:00
parent cc3b28a59a
commit 2c9b1bba3a
5 changed files with 231 additions and 27 deletions

View File

@@ -0,0 +1,127 @@
import requests
import pandas as pd
import json
from openai import OpenAI
from tqdm import tqdm
from eval_prompt import QA_generation_prompt
from datasets import Dataset, DatasetDict
# 常量
API_KEY = "dataset-OFxH5fwjOmYnfBsQkSWm8gHs"
DATASETS_NAME = ["2d-mat-new", "eval-paper-new", "gold-nanorod-new", "PSK-new", "phospholipid"]
OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2"
OPENAI_BASE_URL = "http://8.218.238.241:17935/v1"
MODEL_NAME = "gpt-4o-mini"
DATASETS_URL = 'http://100.85.52.31:7080/v1/datasets?page=1&limit=100'
DOCUMENTS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents'
CHUNKS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents/{}/segments'
N_GENERATIONS = -1
def get_all_chunks(datasets_name):
"""
获取所有知识库文档的所有块。
Returns:
包含所有块的列表。
"""
headers = {'Authorization': f'Bearer {API_KEY}'}
all_chunks = []
# 获取数据集
datasets_response = requests.get(DATASETS_URL, headers=headers)
datasets = datasets_response.json()['data']
for dataset in datasets:
dataset_id = dataset['id']
if dataset['name'] not in datasets_name:
continue
# 获取文档
documents_response = requests.get(DOCUMENTS_URL.format(dataset_id), headers=headers)
documents = documents_response.json()['data']
for document in documents:
document_id = document['id']
# 获取块
chunks_response = requests.get(CHUNKS_URL.format(dataset_id, document_id), headers=headers)
chunks = chunks_response.json()['data']
for chunk in chunks:
all_chunks.append({
'dataset_name': dataset['name'],
'dataset_id': dataset_id,
'document_id': document_id,
'chunk_id': chunk['id'],
'chunk_text': chunk['content']
})
return all_chunks
def get_response_from_llm(messages: list[dict], tools: list = None):
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
if tools is None:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
)
else:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=tools
)
content = response.choices[0].message.content
return content
def qa_generator(docs_chunks: list):
n_samples = len(docs_chunks) if N_GENERATIONS==-1 else N_GENERATIONS
assert N_GENERATIONS <= len(docs_chunks), f"N_GENERATIONS MUST LOWER THAN THE LENGTH OF chunks {len(docs_chunks)}"
print(f"Generating {n_samples} QA couples...")
outputs = []
for sampled_context in tqdm(docs_chunks[:n_samples]):
# Generate QA couple
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": QA_generation_prompt.format(context=sampled_context['chunk_text'])}
]
output_QA_couple = get_response_from_llm(messages)
try:
question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0]
answer = output_QA_couple.split("Answer: ")[-1]
assert len(answer) < 300, "Answer is too long"
outputs.append(
{
"context": sampled_context['chunk_text'],
"question": question,
"answer": answer,
"source_doc": {"dataset_id": sampled_context["dataset_id"], "document_id": sampled_context["document_id"]}
}
)
except:
continue
return outputs
if __name__ == "__main__":
chunks = get_all_chunks(DATASETS_NAME)
qas = qa_generator(chunks)
# 创建 Hugging Face 数据集
dataset = Dataset.from_pandas(pd.DataFrame(qas))
dataset_dict = DatasetDict({"train": dataset})
# 保存数据集
import os
dir_name = os.path.dirname(__file__)
dataset_dict.save_to_disk(os.path.join(dir_name, "eval_rag_dataset"))
print(f"数据集已保存至本地 {dir_name}/eval_rag_dataset")
# 如果要发布到 Hugging Face Hub请取消注释以下行并提供您的用户名和数据集名称
# dataset_dict.push_to_hub("your-username/your-dataset-name", private=True)
# print("数据集已保存至 Hugging Face Hub。要发布数据集请手动更改设置。")

View File

@@ -0,0 +1,43 @@
QA_generation_prompt = """
Your task is to write a factoid question and an answer given a context.
Your factoid question should be answerable with a specific, concise piece of factual information from the context.
Your factoid question should be formulated in the same style as questions users could ask in a search engine.
This means that your factoid question MUST NOT mention something like "according to the passage" or "context".
Provide your answer as follows:
Output:::
Factoid question: (your factoid question)
Answer: (your answer to the factoid question)
Now here is the context.
Context: {context}\n
Output:::"""
EVALUATION_PROMPT = """###Task Description:
An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\"
4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.
###The instruction to evaluate:
{instruction}
###Response to evaluate:
{response}
###Reference Answer (Score 5):
{reference_answer}
###Score Rubrics:
[Is the response correct, accurate, and factual based on the reference answer?]
Score 1: The response is completely incorrect, inaccurate, and/or not factual.
Score 2: The response is mostly incorrect, inaccurate, and/or not factual.
Score 3: The response is somewhat correct, accurate, and/or factual.
Score 4: The response is mostly correct, accurate, and factual.
Score 5: The response is completely correct, accurate, and factual.
###Feedback:"""

View File

@@ -0,0 +1,44 @@
from datasets import load_from_disk, Dataset, DatasetDict
from eval_prompt import EVALUATION_PROMPT
from openai import OpenAI
OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2"
OPENAI_BASE_URL = "http://8.218.238.241:17935/v1"
MODEL_NAME = "gpt-4o-mini"
def load_eval_rag_dataset(dataset_path: str) -> DatasetDict:
"""Loads the eval_rag_dataset from disk.
Args:
dataset_path (str): The path to the dataset.
Returns:
DatasetDict: The loaded dataset.
"""
return load_from_disk(dataset_path)
def get_response_from_llm(messages: list[dict], tools: list = None):
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
if tools is None:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
)
else:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=tools
)
content = response.choices[0].message.content
return content
DATASET_PATH = "_backend/evaluate/eval_rag_dataset"
eval_dataset = load_eval_rag_dataset(DATASET_PATH)['train']
for i in eval_dataset:
print()