fix: Enhance error handling and logging across multiple evaluators

- Added logging for file retrieval and error handling in file.py, improving robustness during file operations.
- Implemented checks for file existence and parsing errors in general.py, enhancing reliability in JSON/YAML processing.
- Improved table comparison logic in table.py with detailed error logging for sheet loading and cell value reading.
- Enhanced metrics evaluation in slides.py with additional checks for paragraph and run counts, ensuring thorough comparison.
- Updated utils.py to include file existence checks and detailed error logging during cell value reading.
This commit is contained in:
yuanmengqi
2025-07-14 05:43:17 +00:00
parent 349f2fd9fe
commit 0651495d88
5 changed files with 226 additions and 48 deletions

View File

@@ -4,6 +4,7 @@ import functools
import itertools
import logging
import operator
import os
import re
import zipfile
#import pandas as pd
@@ -33,10 +34,11 @@ V = TypeVar("Value")
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_namespaces = [
("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
@@ -282,6 +284,13 @@ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ #
logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
# Check if file exists
if not os.path.exists(xlsx_file):
logger.error(f"Excel file not found: {xlsx_file}")
return None
try:
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
@@ -308,9 +317,17 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells) == 0:
logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
return None
cell: _Element = cells[0]
except zipfile.BadZipFile:
except zipfile.BadZipFile as e:
logger.error(f"Bad zip file {xlsx_file}: {e}")
return None
except KeyError as e:
logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
return None
except Exception as e:
logger.error(f"Error reading {xlsx_file}: {e}")
return None
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")