Files
mars-mcp/mars_toolkit/query/oqmd_query.py

100 lines
3.8 KiB
Python
Executable File

import logging
import os
import httpx
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
from typing import Annotated
from ..core import config
from ..core.llm_tools import llm_tool
logger = logging.getLogger(__name__)
@llm_tool(name="fetch_chemical_composition_from_OQMD", description="Fetch material data for a chemical composition from OQMD database")
async def fetch_chemical_composition_from_OQMD(
composition: Annotated[str, "Chemical formula (e.g., Fe2O3, LiFePO4)"]
) -> str:
"""
Fetch material data for a chemical composition from OQMD database.
Args:
composition: Chemical formula (e.g., Fe2O3, LiFePO4)
Returns:
Formatted text with material information and property tables
"""
# Fetch data from OQMD
os.environ['HTTP_PROXY'] = config.HTTP_PROXY or ''
os.environ['HTTPS_PROXY'] = config.HTTPS_PROXY or ''
url = f"https://www.oqmd.org/materials/composition/{composition}"
try:
async with httpx.AsyncClient(timeout=100.0) as client:
response = await client.get(url)
response.raise_for_status()
# Validate response content
if not response.text or len(response.text) < 100:
raise ValueError("Invalid response content from OQMD API")
# Parse HTML data
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# Parse basic data
basic_data = []
h1_element = soup.find('h1')
if h1_element:
basic_data.append(h1_element.text.strip())
else:
basic_data.append(f"Material: {composition}")
for script in soup.find_all('p'):
if script:
combined_text = ""
for element in script.contents:
if hasattr(element, 'name') and element.name == 'a' and 'href' in element.attrs:
url = "https://www.oqmd.org" + element['href']
combined_text += f"[{element.text.strip()}]({url}) "
elif hasattr(element, 'text'):
combined_text += element.text.strip() + " "
else:
combined_text += str(element).strip() + " "
basic_data.append(combined_text.strip())
# Parse table data
table_data = ""
table = soup.find('table')
if table:
try:
df = pd.read_html(StringIO(str(table)))[0]
df = df.fillna('')
df = df.replace([float('inf'), float('-inf')], '')
table_data = df.to_markdown(index=False)
except Exception as e:
logger.error(f"Error parsing table: {str(e)}")
table_data = "Error parsing table data"
# Integrate data into a single text
combined_text = "\n\n".join(basic_data)
if table_data:
combined_text += "\n\n## Material Properties Table\n\n" + table_data
return combined_text
except httpx.HTTPStatusError as e:
logger.error(f"OQMD API request failed: {str(e)}")
return f"Error: OQMD API request failed - {str(e)}"
except httpx.TimeoutException:
logger.error("OQMD API request timed out")
return "Error: OQMD API request timed out"
except httpx.NetworkError as e:
logger.error(f"Network error occurred: {str(e)}")
return f"Error: Network error occurred - {str(e)}"
except ValueError as e:
logger.error(f"Invalid response content: {str(e)}")
return f"Error: Invalid response content - {str(e)}"
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return f"Error: Unexpected error occurred - {str(e)}"