Files
mars-mcp/mars_toolkit/query/oqmd_query.py
2025-04-02 16:24:50 +08:00

96 lines
3.7 KiB
Python

import logging
import httpx
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
from typing import Annotated
from mars_toolkit.core.llm_tools import llm_tool
logger = logging.getLogger(__name__)
@llm_tool(name="fetch_chemical_composition_from_OQMD", description="Fetch material data for a chemical composition from OQMD database")
async def fetch_chemical_composition_from_OQMD(
composition: Annotated[str, "Chemical formula (e.g., Fe2O3, LiFePO4)"]
) -> str:
"""
Fetch material data for a chemical composition from OQMD database.
Args:
composition: Chemical formula (e.g., Fe2O3, LiFePO4)
Returns:
Formatted text with material information and property tables
"""
# Fetch data from OQMD
url = f"https://www.oqmd.org/materials/composition/{composition}"
try:
async with httpx.AsyncClient(timeout=100.0) as client:
response = await client.get(url)
response.raise_for_status()
# Validate response content
if not response.text or len(response.text) < 100:
raise ValueError("Invalid response content from OQMD API")
# Parse HTML data
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# Parse basic data
basic_data = []
h1_element = soup.find('h1')
if h1_element:
basic_data.append(h1_element.text.strip())
else:
basic_data.append(f"Material: {composition}")
for script in soup.find_all('p'):
if script:
combined_text = ""
for element in script.contents:
if hasattr(element, 'name') and element.name == 'a' and 'href' in element.attrs:
url = "https://www.oqmd.org" + element['href']
combined_text += f"[{element.text.strip()}]({url}) "
elif hasattr(element, 'text'):
combined_text += element.text.strip() + " "
else:
combined_text += str(element).strip() + " "
basic_data.append(combined_text.strip())
# Parse table data
table_data = ""
table = soup.find('table')
if table:
try:
df = pd.read_html(StringIO(str(table)))[0]
df = df.fillna('')
df = df.replace([float('inf'), float('-inf')], '')
table_data = df.to_markdown(index=False)
except Exception as e:
logger.error(f"Error parsing table: {str(e)}")
table_data = "Error parsing table data"
# Integrate data into a single text
combined_text = "\n\n".join(basic_data)
if table_data:
combined_text += "\n\n## Material Properties Table\n\n" + table_data
return combined_text
except httpx.HTTPStatusError as e:
logger.error(f"OQMD API request failed: {str(e)}")
return f"Error: OQMD API request failed - {str(e)}"
except httpx.TimeoutException:
logger.error("OQMD API request timed out")
return "Error: OQMD API request timed out"
except httpx.NetworkError as e:
logger.error(f"Network error occurred: {str(e)}")
return f"Error: Network error occurred - {str(e)}"
except ValueError as e:
logger.error(f"Invalid response content: {str(e)}")
return f"Error: Invalid response content - {str(e)}"
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return f"Error: Unexpected error occurred - {str(e)}"