""" Web Search Module This module provides functions for searching information on the web. """ import asyncio from typing import Annotated, Dict, Any, List from langchain_community.utilities import SearxSearchWrapper from mars_toolkit.core.llm_tools import llm_tool from mars_toolkit.core.config import config @llm_tool(name="search_online", description="Search scientific information online and return results as a string") async def search_online( query: Annotated[str, "Search term"], num_results: Annotated[int, "Number of results (1-20)"] = 5 ) -> str: """ Searches for scientific information online and returns results as a formatted string. Args: query: Search term for scientific content num_results: Number of results to return (1-20) Returns: Formatted string with search results (titles, snippets, links) """ # 确保 num_results 是整数 try: num_results = int(num_results) except (TypeError, ValueError): num_results = 5 # Parameter validation if num_results < 1: num_results = 1 elif num_results > 20: num_results = 20 # Initialize search wrapper search = SearxSearchWrapper( searx_host=config.SEARXNG_HOST, categories=["science",], k=num_results ) # Execute search in a separate thread to avoid blocking the event loop # since SearxSearchWrapper doesn't have native async support loop = asyncio.get_event_loop() raw_results = await loop.run_in_executor( None, lambda: search.results(query, language=['en','zh'], num_results=num_results) ) # Transform results into structured format formatted_results = [] for result in raw_results: formatted_results.append({ "title": result.get("title", ""), "snippet": result.get("snippet", ""), "link": result.get("link", ""), "source": result.get("source", "") }) # Convert the results to a formatted string result_str = f"Search Results for '{query}' ({len(formatted_results)} items):\n\n" for i, result in enumerate(formatted_results, 1): result_str += f"Result {i}:\n" result_str += f"Title: {result['title']}\n" result_str += f"Summary: {result['snippet']}\n" result_str += f"Link: {result['link']}\n" result_str += f"Source: {result['source']}\n\n" return result_str