新增rag eval

This commit is contained in:
2025-02-04 12:48:54 +08:00
parent cc3b28a59a
commit 2c9b1bba3a
5 changed files with 231 additions and 27 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,7 @@
# ---> Python
_backend/evaluate/eval_rag_dataset/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@@ -0,0 +1,127 @@
import requests
import pandas as pd
import json
from openai import OpenAI
from tqdm import tqdm
from eval_prompt import QA_generation_prompt
from datasets import Dataset, DatasetDict
# 常量
API_KEY = "dataset-OFxH5fwjOmYnfBsQkSWm8gHs"
DATASETS_NAME = ["2d-mat-new", "eval-paper-new", "gold-nanorod-new", "PSK-new", "phospholipid"]
OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2"
OPENAI_BASE_URL = "http://8.218.238.241:17935/v1"
MODEL_NAME = "gpt-4o-mini"
DATASETS_URL = 'http://100.85.52.31:7080/v1/datasets?page=1&limit=100'
DOCUMENTS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents'
CHUNKS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents/{}/segments'
N_GENERATIONS = -1
def get_all_chunks(datasets_name):
"""
获取所有知识库文档的所有块。
Returns:
包含所有块的列表。
"""
headers = {'Authorization': f'Bearer {API_KEY}'}
all_chunks = []
# 获取数据集
datasets_response = requests.get(DATASETS_URL, headers=headers)
datasets = datasets_response.json()['data']
for dataset in datasets:
dataset_id = dataset['id']
if dataset['name'] not in datasets_name:
continue
# 获取文档
documents_response = requests.get(DOCUMENTS_URL.format(dataset_id), headers=headers)
documents = documents_response.json()['data']
for document in documents:
document_id = document['id']
# 获取块
chunks_response = requests.get(CHUNKS_URL.format(dataset_id, document_id), headers=headers)
chunks = chunks_response.json()['data']
for chunk in chunks:
all_chunks.append({
'dataset_name': dataset['name'],
'dataset_id': dataset_id,
'document_id': document_id,
'chunk_id': chunk['id'],
'chunk_text': chunk['content']
})
return all_chunks
def get_response_from_llm(messages: list[dict], tools: list = None):
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
if tools is None:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
)
else:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=tools
)
content = response.choices[0].message.content
return content
def qa_generator(docs_chunks: list):
n_samples = len(docs_chunks) if N_GENERATIONS==-1 else N_GENERATIONS
assert N_GENERATIONS <= len(docs_chunks), f"N_GENERATIONS MUST LOWER THAN THE LENGTH OF chunks {len(docs_chunks)}"
print(f"Generating {n_samples} QA couples...")
outputs = []
for sampled_context in tqdm(docs_chunks[:n_samples]):
# Generate QA couple
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": QA_generation_prompt.format(context=sampled_context['chunk_text'])}
]
output_QA_couple = get_response_from_llm(messages)
try:
question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0]
answer = output_QA_couple.split("Answer: ")[-1]
assert len(answer) < 300, "Answer is too long"
outputs.append(
{
"context": sampled_context['chunk_text'],
"question": question,
"answer": answer,
"source_doc": {"dataset_id": sampled_context["dataset_id"], "document_id": sampled_context["document_id"]}
}
)
except:
continue
return outputs
if __name__ == "__main__":
chunks = get_all_chunks(DATASETS_NAME)
qas = qa_generator(chunks)
# 创建 Hugging Face 数据集
dataset = Dataset.from_pandas(pd.DataFrame(qas))
dataset_dict = DatasetDict({"train": dataset})
# 保存数据集
import os
dir_name = os.path.dirname(__file__)
dataset_dict.save_to_disk(os.path.join(dir_name, "eval_rag_dataset"))
print(f"数据集已保存至本地 {dir_name}/eval_rag_dataset")
# 如果要发布到 Hugging Face Hub请取消注释以下行并提供您的用户名和数据集名称
# dataset_dict.push_to_hub("your-username/your-dataset-name", private=True)
# print("数据集已保存至 Hugging Face Hub。要发布数据集请手动更改设置。")

View File

@@ -0,0 +1,43 @@
QA_generation_prompt = """
Your task is to write a factoid question and an answer given a context.
Your factoid question should be answerable with a specific, concise piece of factual information from the context.
Your factoid question should be formulated in the same style as questions users could ask in a search engine.
This means that your factoid question MUST NOT mention something like "according to the passage" or "context".
Provide your answer as follows:
Output:::
Factoid question: (your factoid question)
Answer: (your answer to the factoid question)
Now here is the context.
Context: {context}\n
Output:::"""
EVALUATION_PROMPT = """###Task Description:
An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\"
4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.
###The instruction to evaluate:
{instruction}
###Response to evaluate:
{response}
###Reference Answer (Score 5):
{reference_answer}
###Score Rubrics:
[Is the response correct, accurate, and factual based on the reference answer?]
Score 1: The response is completely incorrect, inaccurate, and/or not factual.
Score 2: The response is mostly incorrect, inaccurate, and/or not factual.
Score 3: The response is somewhat correct, accurate, and/or factual.
Score 4: The response is mostly correct, accurate, and factual.
Score 5: The response is completely correct, accurate, and factual.
###Feedback:"""

View File

@@ -0,0 +1,44 @@
from datasets import load_from_disk, Dataset, DatasetDict
from eval_prompt import EVALUATION_PROMPT
from openai import OpenAI
OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2"
OPENAI_BASE_URL = "http://8.218.238.241:17935/v1"
MODEL_NAME = "gpt-4o-mini"
def load_eval_rag_dataset(dataset_path: str) -> DatasetDict:
"""Loads the eval_rag_dataset from disk.
Args:
dataset_path (str): The path to the dataset.
Returns:
DatasetDict: The loaded dataset.
"""
return load_from_disk(dataset_path)
def get_response_from_llm(messages: list[dict], tools: list = None):
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
if tools is None:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
)
else:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=tools
)
content = response.choices[0].message.content
return content
DATASET_PATH = "_backend/evaluate/eval_rag_dataset"
eval_dataset = load_eval_rag_dataset(DATASET_PATH)['train']
for i in eval_dataset:
print()

View File

@@ -1,5 +1,6 @@
import asyncio
from typing import Sequence
from autogen_core import CancellationToken
from autogen_agentchat.agents import AssistantAgent, SocietyOfMindAgent, UserProxyAgent
from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination, HandoffTermination
from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage, ToolCallExecutionEvent
@@ -23,36 +24,22 @@ model_client = OpenAIChatCompletionClient(
},
)
async def main(task: str = ""):
user = UserProxyAgent("user_agent", input_func=input)
rag_agent = AssistantAgent(
"RAGAgent",
description="An expert agent in the field of materials science",
async def main():
assistant = AssistantAgent(
name="assistant",
system_message="""You are a helpful assistant. You can call tools to help user.""",
model_client=model_client,
system_message="""
You are a professional scientist in materials science.
You solve material science problems together by talking to users, and you can invoke tools to retrieve information from the knowledge base to implement RAG.
Always handoff back to user_agent when response is complete.
""",
handoffs=["user_agent"],
reflect_on_tool_use=True,
tools=[vector_retrieval_from_knowledge_base]
tools=[vector_retrieval_from_knowledge_base],
reflect_on_tool_use=True, # Set to True to have the model reflect on the tool use, set to False to return the tool call result directly.
)
# handoff_termination = HandoffTermination("DataAnalyst_PlanningAgent")
text_mention_termination = TextMentionTermination("APPROVE")
max_messages_termination = MaxMessageTermination(max_messages=50)
termination = text_mention_termination | max_messages_termination #| handoff_termination
# termination = max_messages_termination
team = Swarm(
participants=[rag_agent, user],
termination_condition=termination
)
await Console(team.run_stream(task=task))
while True:
user_input = input("User: ")
if user_input == "exit":
break
response = await assistant.on_messages([TextMessage(content=user_input, source="user")], CancellationToken())
print("Assistant:", response.chat_message.content)
if __name__ == "__main__":
asyncio.run(main("Let the robot synthesize CsPbBr3 nanocubes at room temperature"))
asyncio.run(main())