mars-mcp/mars_toolkit/core/cif_utils.py

"""
CIF Utilities Module

This module provides basic functions for handling CIF (Crystallographic Information File) files,
which are commonly used in materials science for representing crystal structures.
"""

import json
import logging

logger = logging.getLogger(__name__)

def read_cif_txt_file(file_path):
    """
    Read the CIF file and return its content.

    Args:
        file_path: Path to the CIF file

    Returns:
        String content of the CIF file or None if an error occurs
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        logger.error(f"Error reading file {file_path}: {e}")
        return None

def extract_cif_info(path: str, fields_name: list):
    """
    Extract specific fields from the CIF description JSON file.

    Args:
        path: Path to the JSON file containing CIF information
        fields_name: List of field categories to extract. Use 'all_fields' to extract all fields.
                    Other options include 'basic_fields', 'energy_electronic_fields', 'metal_magentic_fields'

    Returns:
        Dictionary containing the extracted fields
    """
    basic_fields = ['formula_pretty', 'chemsys', 'composition', 'elements', 'symmetry', 'nsites', 'volume', 'density']
    energy_electronic_fields = ['formation_energy_per_atom', 'energy_above_hull', 'is_stable', 'efermi', 'cbm', 'vbm', 'band_gap', 'is_gap_direct']
    metal_magentic_fields = ['is_metal', 'is_magnetic', "ordering", 'total_magnetization', 'num_magnetic_sites']

    selected_fields = []
    if fields_name[0] == 'all_fields':
        selected_fields = basic_fields + energy_electronic_fields + metal_magentic_fields
    else:
        for field in fields_name:
            selected_fields.extend(locals().get(field, []))

    with open(path, 'r') as f:
        docs = json.load(f)

    new_docs = {}
    for field_name in selected_fields:
        new_docs[field_name] = docs.get(field_name, '')

    return new_docs

def remove_symmetry_equiv_xyz(cif_content):
    """
    Remove symmetry operations section from CIF file content.

    This is often useful when working with CIF files in certain visualization tools
    or when focusing on the basic structure without symmetry operations.

    Args:
        cif_content: CIF file content string

    Returns:
        Cleaned CIF content string with symmetry operations removed
    """
    lines = cif_content.split('\n')
    output_lines = []

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        # 检测循环开始
        if line == 'loop_':
            # 查看下一行，检查是否是对称性循环
            next_lines = []
            j = i + 1
            while j < len(lines) and lines[j].strip().startswith('_'):
                next_lines.append(lines[j].strip())
                j += 1

            # 检查是否包含对称性操作标签
            if any('_symmetry_equiv_pos_as_xyz' in tag for tag in next_lines):
                # 跳过整个循环块
                while i < len(lines):
                    if i + 1 >= len(lines):
                        break

                    next_line = lines[i + 1].strip()
                    # 检查是否到达下一个循环或数据块
                    if next_line == 'loop_' or next_line.startswith('data_'):
                        break

                    # 检查是否到达原子位置部分
                    if next_line.startswith('_atom_site_'):
                        break

                    i += 1
            else:
                # 不是对称性循环，保留loop_行
                output_lines.append(lines[i])
        else:
            # 非循环开始行，直接保留
            output_lines.append(lines[i])

        i += 1

    return '\n'.join(output_lines)