diff --git a/.gitignore b/.gitignore index d122292..6a99ccf 100755 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ # ---> Python + +_backend/evaluate/eval_rag_dataset/* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/_backend/evaluate/construct_rag_eval_dataset.py b/_backend/evaluate/construct_rag_eval_dataset.py new file mode 100644 index 0000000..47d09f0 --- /dev/null +++ b/_backend/evaluate/construct_rag_eval_dataset.py @@ -0,0 +1,127 @@ +import requests +import pandas as pd +import json +from openai import OpenAI +from tqdm import tqdm +from eval_prompt import QA_generation_prompt +from datasets import Dataset, DatasetDict + +# 常量 +API_KEY = "dataset-OFxH5fwjOmYnfBsQkSWm8gHs" +DATASETS_NAME = ["2d-mat-new", "eval-paper-new", "gold-nanorod-new", "PSK-new", "phospholipid"] +OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2" +OPENAI_BASE_URL = "http://8.218.238.241:17935/v1" +MODEL_NAME = "gpt-4o-mini" +DATASETS_URL = 'http://100.85.52.31:7080/v1/datasets?page=1&limit=100' +DOCUMENTS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents' +CHUNKS_URL = 'http://100.85.52.31:7080/v1/datasets/{}/documents/{}/segments' +N_GENERATIONS = -1 + + +def get_all_chunks(datasets_name): + """ + 获取所有知识库文档的所有块。 + + Returns: + 包含所有块的列表。 + """ + + headers = {'Authorization': f'Bearer {API_KEY}'} + all_chunks = [] + + # 获取数据集 + datasets_response = requests.get(DATASETS_URL, headers=headers) + datasets = datasets_response.json()['data'] + + for dataset in datasets: + dataset_id = dataset['id'] + if dataset['name'] not in datasets_name: + continue + + # 获取文档 + documents_response = requests.get(DOCUMENTS_URL.format(dataset_id), headers=headers) + documents = documents_response.json()['data'] + + for document in documents: + document_id = document['id'] + + # 获取块 + chunks_response = requests.get(CHUNKS_URL.format(dataset_id, document_id), headers=headers) + chunks = chunks_response.json()['data'] + + for chunk in chunks: + all_chunks.append({ + 'dataset_name': dataset['name'], + 'dataset_id': dataset_id, + 'document_id': document_id, + 'chunk_id': chunk['id'], + 'chunk_text': chunk['content'] + }) + + return all_chunks + + +def get_response_from_llm(messages: list[dict], tools: list = None): + client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL) + if tools is None: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + ) + else: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + tools=tools + ) + content = response.choices[0].message.content + return content + + +def qa_generator(docs_chunks: list): + n_samples = len(docs_chunks) if N_GENERATIONS==-1 else N_GENERATIONS + assert N_GENERATIONS <= len(docs_chunks), f"N_GENERATIONS MUST LOWER THAN THE LENGTH OF chunks {len(docs_chunks)}" + print(f"Generating {n_samples} QA couples...") + + outputs = [] + for sampled_context in tqdm(docs_chunks[:n_samples]): + # Generate QA couple + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": QA_generation_prompt.format(context=sampled_context['chunk_text'])} + ] + output_QA_couple = get_response_from_llm(messages) + try: + question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0] + answer = output_QA_couple.split("Answer: ")[-1] + assert len(answer) < 300, "Answer is too long" + outputs.append( + { + "context": sampled_context['chunk_text'], + "question": question, + "answer": answer, + "source_doc": {"dataset_id": sampled_context["dataset_id"], "document_id": sampled_context["document_id"]} + } + ) + except: + continue + return outputs + + +if __name__ == "__main__": + chunks = get_all_chunks(DATASETS_NAME) + qas = qa_generator(chunks) + + # 创建 Hugging Face 数据集 + dataset = Dataset.from_pandas(pd.DataFrame(qas)) + dataset_dict = DatasetDict({"train": dataset}) + + # 保存数据集 + import os + dir_name = os.path.dirname(__file__) + dataset_dict.save_to_disk(os.path.join(dir_name, "eval_rag_dataset")) + print(f"数据集已保存至本地 {dir_name}/eval_rag_dataset") + + # 如果要发布到 Hugging Face Hub,请取消注释以下行并提供您的用户名和数据集名称 + # dataset_dict.push_to_hub("your-username/your-dataset-name", private=True) + # print("数据集已保存至 Hugging Face Hub。要发布数据集,请手动更改设置。") diff --git a/_backend/evaluate/eval_prompt.py b/_backend/evaluate/eval_prompt.py new file mode 100644 index 0000000..9e8c2d1 --- /dev/null +++ b/_backend/evaluate/eval_prompt.py @@ -0,0 +1,43 @@ +QA_generation_prompt = """ +Your task is to write a factoid question and an answer given a context. +Your factoid question should be answerable with a specific, concise piece of factual information from the context. +Your factoid question should be formulated in the same style as questions users could ask in a search engine. +This means that your factoid question MUST NOT mention something like "according to the passage" or "context". + +Provide your answer as follows: + +Output::: +Factoid question: (your factoid question) +Answer: (your answer to the factoid question) + +Now here is the context. + +Context: {context}\n +Output:::""" + + +EVALUATION_PROMPT = """###Task Description: +An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given. +1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general. +2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric. +3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\" +4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output. + +###The instruction to evaluate: +{instruction} + +###Response to evaluate: +{response} + +###Reference Answer (Score 5): +{reference_answer} + +###Score Rubrics: +[Is the response correct, accurate, and factual based on the reference answer?] +Score 1: The response is completely incorrect, inaccurate, and/or not factual. +Score 2: The response is mostly incorrect, inaccurate, and/or not factual. +Score 3: The response is somewhat correct, accurate, and/or factual. +Score 4: The response is mostly correct, accurate, and factual. +Score 5: The response is completely correct, accurate, and factual. + +###Feedback:""" \ No newline at end of file diff --git a/_backend/evaluate/rag_eval.py b/_backend/evaluate/rag_eval.py new file mode 100644 index 0000000..0c88e63 --- /dev/null +++ b/_backend/evaluate/rag_eval.py @@ -0,0 +1,44 @@ +from datasets import load_from_disk, Dataset, DatasetDict +from eval_prompt import EVALUATION_PROMPT +from openai import OpenAI + + +OPENAI_API_KEY = "sk-urFGAQRThR6pysea0aC93bD27fA34bA69811A9254aAaD8B2" +OPENAI_BASE_URL = "http://8.218.238.241:17935/v1" +MODEL_NAME = "gpt-4o-mini" + + +def load_eval_rag_dataset(dataset_path: str) -> DatasetDict: + """Loads the eval_rag_dataset from disk. + + Args: + dataset_path (str): The path to the dataset. + + Returns: + DatasetDict: The loaded dataset. + """ + return load_from_disk(dataset_path) + + +def get_response_from_llm(messages: list[dict], tools: list = None): + client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL) + if tools is None: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + ) + else: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + tools=tools + ) + content = response.choices[0].message.content + return content + + + +DATASET_PATH = "_backend/evaluate/eval_rag_dataset" +eval_dataset = load_eval_rag_dataset(DATASET_PATH)['train'] +for i in eval_dataset: + print() \ No newline at end of file diff --git a/_backend/single_agent_with_rag.py b/_backend/single_agent_with_rag.py index db1bae1..d82a5e2 100644 --- a/_backend/single_agent_with_rag.py +++ b/_backend/single_agent_with_rag.py @@ -1,5 +1,6 @@ import asyncio from typing import Sequence +from autogen_core import CancellationToken from autogen_agentchat.agents import AssistantAgent, SocietyOfMindAgent, UserProxyAgent from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination, HandoffTermination from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage, ToolCallExecutionEvent @@ -23,36 +24,22 @@ model_client = OpenAIChatCompletionClient( }, ) -async def main(task: str = ""): - user = UserProxyAgent("user_agent", input_func=input) - rag_agent = AssistantAgent( - "RAGAgent", - description="An expert agent in the field of materials science", +async def main(): + assistant = AssistantAgent( + name="assistant", + system_message="""You are a helpful assistant. You can call tools to help user.""", model_client=model_client, - system_message=""" - You are a professional scientist in materials science. - You solve material science problems together by talking to users, and you can invoke tools to retrieve information from the knowledge base to implement RAG. - - Always handoff back to user_agent when response is complete. - """, - handoffs=["user_agent"], - reflect_on_tool_use=True, - tools=[vector_retrieval_from_knowledge_base] + tools=[vector_retrieval_from_knowledge_base], + reflect_on_tool_use=True, # Set to True to have the model reflect on the tool use, set to False to return the tool call result directly. ) - # handoff_termination = HandoffTermination("DataAnalyst_PlanningAgent") - text_mention_termination = TextMentionTermination("APPROVE") - max_messages_termination = MaxMessageTermination(max_messages=50) - termination = text_mention_termination | max_messages_termination #| handoff_termination - # termination = max_messages_termination - - team = Swarm( - participants=[rag_agent, user], - termination_condition=termination - ) - - await Console(team.run_stream(task=task)) + while True: + user_input = input("User: ") + if user_input == "exit": + break + response = await assistant.on_messages([TextMessage(content=user_input, source="user")], CancellationToken()) + print("Assistant:", response.chat_message.content) if __name__ == "__main__": - asyncio.run(main("Let the robot synthesize CsPbBr3 nanocubes at room temperature")) \ No newline at end of file + asyncio.run(main()) \ No newline at end of file