import logging import httpx import pandas as pd from bs4 import BeautifulSoup from io import StringIO from typing import Annotated from mars_toolkit.core.llm_tools import llm_tool logger = logging.getLogger(__name__) @llm_tool(name="fetch_chemical_composition_from_OQMD", description="Fetch material data for a chemical composition from OQMD database") async def fetch_chemical_composition_from_OQMD( composition: Annotated[str, "Chemical formula (e.g., Fe2O3, LiFePO4)"] ) -> str: """ Fetch material data for a chemical composition from OQMD database. Args: composition: Chemical formula (e.g., Fe2O3, LiFePO4) Returns: Formatted text with material information and property tables """ # Fetch data from OQMD url = f"https://www.oqmd.org/materials/composition/{composition}" try: async with httpx.AsyncClient(timeout=100.0) as client: response = await client.get(url) response.raise_for_status() # Validate response content if not response.text or len(response.text) < 100: raise ValueError("Invalid response content from OQMD API") # Parse HTML data html = response.text soup = BeautifulSoup(html, 'html.parser') # Parse basic data basic_data = [] h1_element = soup.find('h1') if h1_element: basic_data.append(h1_element.text.strip()) else: basic_data.append(f"Material: {composition}") for script in soup.find_all('p'): if script: combined_text = "" for element in script.contents: if hasattr(element, 'name') and element.name == 'a' and 'href' in element.attrs: url = "https://www.oqmd.org" + element['href'] combined_text += f"[{element.text.strip()}]({url}) " elif hasattr(element, 'text'): combined_text += element.text.strip() + " " else: combined_text += str(element).strip() + " " basic_data.append(combined_text.strip()) # Parse table data table_data = "" table = soup.find('table') if table: try: df = pd.read_html(StringIO(str(table)))[0] df = df.fillna('') df = df.replace([float('inf'), float('-inf')], '') table_data = df.to_markdown(index=False) except Exception as e: logger.error(f"Error parsing table: {str(e)}") table_data = "Error parsing table data" # Integrate data into a single text combined_text = "\n\n".join(basic_data) if table_data: combined_text += "\n\n## Material Properties Table\n\n" + table_data return combined_text except httpx.HTTPStatusError as e: logger.error(f"OQMD API request failed: {str(e)}") return f"Error: OQMD API request failed - {str(e)}" except httpx.TimeoutException: logger.error("OQMD API request timed out") return "Error: OQMD API request timed out" except httpx.NetworkError as e: logger.error(f"Network error occurred: {str(e)}") return f"Error: Network error occurred - {str(e)}" except ValueError as e: logger.error(f"Invalid response content: {str(e)}") return f"Error: Invalid response content - {str(e)}" except Exception as e: logger.error(f"Unexpected error: {str(e)}") return f"Error: Unexpected error occurred - {str(e)}"