Files
mars-mcp/mars_toolkit/query/web_search.py

78 lines
2.4 KiB
Python

"""
Web Search Module
This module provides functions for searching information on the web.
"""
import asyncio
from typing import Annotated, Dict, Any, List
from langchain_community.utilities import SearxSearchWrapper
from mars_toolkit.core.llm_tools import llm_tool
from mars_toolkit.core.config import config
@llm_tool(name="search_online", description="Search scientific information online and return results as a string")
async def search_online(
query: Annotated[str, "Search term"],
num_results: Annotated[int, "Number of results (1-20)"] = 5
) -> str:
"""
Searches for scientific information online and returns results as a formatted string.
Args:
query: Search term for scientific content
num_results: Number of results to return (1-20)
Returns:
Formatted string with search results (titles, snippets, links)
"""
# 确保 num_results 是整数
try:
num_results = int(num_results)
except (TypeError, ValueError):
num_results = 5
# Parameter validation
if num_results < 1:
num_results = 1
elif num_results > 20:
num_results = 20
# Initialize search wrapper
search = SearxSearchWrapper(
searx_host=config.SEARXNG_HOST,
categories=["science",],
k=num_results
)
# Execute search in a separate thread to avoid blocking the event loop
# since SearxSearchWrapper doesn't have native async support
loop = asyncio.get_event_loop()
raw_results = await loop.run_in_executor(
None,
lambda: search.results(query, language=['en','zh'], num_results=num_results)
)
# Transform results into structured format
formatted_results = []
for result in raw_results:
formatted_results.append({
"title": result.get("title", ""),
"snippet": result.get("snippet", ""),
"link": result.get("link", ""),
"source": result.get("source", "")
})
# Convert the results to a formatted string
result_str = f"Search Results for '{query}' ({len(formatted_results)} items):\n\n"
for i, result in enumerate(formatted_results, 1):
result_str += f"Result {i}:\n"
result_str += f"Title: {result['title']}\n"
result_str += f"Summary: {result['snippet']}\n"
result_str += f"Link: {result['link']}\n"
result_str += f"Source: {result['source']}\n\n"
return result_str