Files
mars-mcp/mars_toolkit/query/web_search.py
2025-04-16 11:15:01 +08:00

81 lines
2.5 KiB
Python
Executable File

"""
Web Search Module
This module provides functions for searching information on the web.
"""
import asyncio
import os
from typing import Annotated, Dict, Any, List
from langchain_community.utilities import SearxSearchWrapper
from mars_toolkit.core.llm_tools import llm_tool
from mars_toolkit.core.config import config
@llm_tool(name="search_online", description="Search scientific information online and return results as a string")
async def search_online(
query: Annotated[str, "Search term"],
num_results: Annotated[int, "Number of results (1-20)"] = 5
) -> str:
"""
Searches for scientific information online and returns results as a formatted string.
Args:
query: Search term for scientific content
num_results: Number of results to return (1-20)
Returns:
Formatted string with search results (titles, snippets, links)
"""
# 确保 num_results 是整数
os.environ['HTTP_PROXY'] = ''
os.environ['HTTPS_PROXY'] = ''
try:
num_results = int(num_results)
except (TypeError, ValueError):
num_results = 5
# Parameter validation
if num_results < 1:
num_results = 1
elif num_results > 20:
num_results = 20
# Initialize search wrapper
search = SearxSearchWrapper(
searx_host=config.SEARXNG_HOST,
categories=["science",],
k=num_results
)
# Execute search in a separate thread to avoid blocking the event loop
# since SearxSearchWrapper doesn't have native async support
loop = asyncio.get_event_loop()
raw_results = await loop.run_in_executor(
None,
lambda: search.results(query, language=['en','zh'], num_results=num_results)
)
# Transform results into structured format
formatted_results = []
for result in raw_results:
formatted_results.append({
"title": result.get("title", ""),
"snippet": result.get("snippet", ""),
"link": result.get("link", ""),
"source": result.get("source", "")
})
# Convert the results to a formatted string
result_str = f"Search Results for '{query}' ({len(formatted_results)} items):\n\n"
for i, result in enumerate(formatted_results, 1):
result_str += f"Result {i}:\n"
result_str += f"Title: {result['title']}\n"
result_str += f"Summary: {result['snippet']}\n"
result_str += f"Link: {result['link']}\n"
result_str += f"Source: {result['source']}\n\n"
return result_str