78 lines
2.4 KiB
Python
78 lines
2.4 KiB
Python
"""
|
|
Web Search Module
|
|
|
|
This module provides functions for searching information on the web.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Annotated, Dict, Any, List
|
|
|
|
from langchain_community.utilities import SearxSearchWrapper
|
|
|
|
from mars_toolkit.core.llm_tools import llm_tool
|
|
from mars_toolkit.core.config import config
|
|
|
|
@llm_tool(name="search_online", description="Search scientific information online and return results as a string")
|
|
async def search_online(
|
|
query: Annotated[str, "Search term"],
|
|
num_results: Annotated[int, "Number of results (1-20)"] = 5
|
|
) -> str:
|
|
"""
|
|
Searches for scientific information online and returns results as a formatted string.
|
|
|
|
Args:
|
|
query: Search term for scientific content
|
|
num_results: Number of results to return (1-20)
|
|
|
|
Returns:
|
|
Formatted string with search results (titles, snippets, links)
|
|
"""
|
|
# 确保 num_results 是整数
|
|
try:
|
|
num_results = int(num_results)
|
|
except (TypeError, ValueError):
|
|
num_results = 5
|
|
|
|
# Parameter validation
|
|
if num_results < 1:
|
|
num_results = 1
|
|
elif num_results > 20:
|
|
num_results = 20
|
|
|
|
# Initialize search wrapper
|
|
search = SearxSearchWrapper(
|
|
searx_host=config.SEARXNG_HOST,
|
|
categories=["science",],
|
|
k=num_results
|
|
)
|
|
|
|
# Execute search in a separate thread to avoid blocking the event loop
|
|
# since SearxSearchWrapper doesn't have native async support
|
|
loop = asyncio.get_event_loop()
|
|
raw_results = await loop.run_in_executor(
|
|
None,
|
|
lambda: search.results(query, language=['en','zh'], num_results=num_results)
|
|
)
|
|
|
|
# Transform results into structured format
|
|
formatted_results = []
|
|
for result in raw_results:
|
|
formatted_results.append({
|
|
"title": result.get("title", ""),
|
|
"snippet": result.get("snippet", ""),
|
|
"link": result.get("link", ""),
|
|
"source": result.get("source", "")
|
|
})
|
|
|
|
# Convert the results to a formatted string
|
|
result_str = f"Search Results for '{query}' ({len(formatted_results)} items):\n\n"
|
|
|
|
for i, result in enumerate(formatted_results, 1):
|
|
result_str += f"Result {i}:\n"
|
|
result_str += f"Title: {result['title']}\n"
|
|
result_str += f"Summary: {result['snippet']}\n"
|
|
result_str += f"Link: {result['link']}\n"
|
|
result_str += f"Source: {result['source']}\n\n"
|
|
|
|
return result_str
|