100 lines
3.8 KiB
Python
Executable File
100 lines
3.8 KiB
Python
Executable File
import logging
|
|
import os
|
|
import httpx
|
|
import pandas as pd
|
|
from bs4 import BeautifulSoup
|
|
from io import StringIO
|
|
from typing import Annotated
|
|
|
|
from ..core import config
|
|
from ..core.llm_tools import llm_tool
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@llm_tool(name="fetch_chemical_composition_from_OQMD", description="Fetch material data for a chemical composition from OQMD database")
|
|
async def fetch_chemical_composition_from_OQMD(
|
|
composition: Annotated[str, "Chemical formula (e.g., Fe2O3, LiFePO4)"]
|
|
) -> str:
|
|
"""
|
|
Fetch material data for a chemical composition from OQMD database.
|
|
|
|
Args:
|
|
composition: Chemical formula (e.g., Fe2O3, LiFePO4)
|
|
|
|
Returns:
|
|
Formatted text with material information and property tables
|
|
"""
|
|
# Fetch data from OQMD
|
|
os.environ['HTTP_PROXY'] = config.HTTP_PROXY or ''
|
|
os.environ['HTTPS_PROXY'] = config.HTTPS_PROXY or ''
|
|
url = f"https://www.oqmd.org/materials/composition/{composition}"
|
|
try:
|
|
async with httpx.AsyncClient(timeout=100.0) as client:
|
|
response = await client.get(url)
|
|
response.raise_for_status()
|
|
|
|
# Validate response content
|
|
if not response.text or len(response.text) < 100:
|
|
raise ValueError("Invalid response content from OQMD API")
|
|
|
|
# Parse HTML data
|
|
html = response.text
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
# Parse basic data
|
|
basic_data = []
|
|
h1_element = soup.find('h1')
|
|
if h1_element:
|
|
basic_data.append(h1_element.text.strip())
|
|
else:
|
|
basic_data.append(f"Material: {composition}")
|
|
|
|
for script in soup.find_all('p'):
|
|
if script:
|
|
combined_text = ""
|
|
for element in script.contents:
|
|
if hasattr(element, 'name') and element.name == 'a' and 'href' in element.attrs:
|
|
url = "https://www.oqmd.org" + element['href']
|
|
combined_text += f"[{element.text.strip()}]({url}) "
|
|
elif hasattr(element, 'text'):
|
|
combined_text += element.text.strip() + " "
|
|
else:
|
|
combined_text += str(element).strip() + " "
|
|
basic_data.append(combined_text.strip())
|
|
|
|
# Parse table data
|
|
table_data = ""
|
|
table = soup.find('table')
|
|
if table:
|
|
try:
|
|
df = pd.read_html(StringIO(str(table)))[0]
|
|
df = df.fillna('')
|
|
df = df.replace([float('inf'), float('-inf')], '')
|
|
table_data = df.to_markdown(index=False)
|
|
except Exception as e:
|
|
logger.error(f"Error parsing table: {str(e)}")
|
|
table_data = "Error parsing table data"
|
|
|
|
# Integrate data into a single text
|
|
combined_text = "\n\n".join(basic_data)
|
|
if table_data:
|
|
combined_text += "\n\n## Material Properties Table\n\n" + table_data
|
|
|
|
return combined_text
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"OQMD API request failed: {str(e)}")
|
|
return f"Error: OQMD API request failed - {str(e)}"
|
|
except httpx.TimeoutException:
|
|
logger.error("OQMD API request timed out")
|
|
return "Error: OQMD API request timed out"
|
|
except httpx.NetworkError as e:
|
|
logger.error(f"Network error occurred: {str(e)}")
|
|
return f"Error: Network error occurred - {str(e)}"
|
|
except ValueError as e:
|
|
logger.error(f"Invalid response content: {str(e)}")
|
|
return f"Error: Invalid response content - {str(e)}"
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {str(e)}")
|
|
return f"Error: Unexpected error occurred - {str(e)}"
|