fix: Enhance error handling and logging across multiple evaluators

- Added logging for file retrieval and error handling in file.py, improving robustness during file operations.
- Implemented checks for file existence and parsing errors in general.py, enhancing reliability in JSON/YAML processing.
- Improved table comparison logic in table.py with detailed error logging for sheet loading and cell value reading.
- Enhanced metrics evaluation in slides.py with additional checks for paragraph and run counts, ensuring thorough comparison.
- Updated utils.py to include file existence checks and detailed error logging during cell value reading.
This commit is contained in:
yuanmengqi
2025-07-14 05:43:17 +00:00
parent 349f2fd9fe
commit 0651495d88
5 changed files with 226 additions and 48 deletions

View File

@@ -1,10 +1,13 @@
import os import os
import logging
from typing import Dict, List, Set from typing import Dict, List, Set
from typing import Optional, Any, Union from typing import Optional, Any, Union
from datetime import datetime from datetime import datetime
import requests import requests
import pandas as pd import pandas as pd
logger = logging.getLogger("desktopenv.getter.file")
def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any: def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any:
""" """
@@ -101,16 +104,42 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
for i, (p, d) in enumerate(zip(paths, dests)): for i, (p, d) in enumerate(zip(paths, dests)):
_path = os.path.join(env.cache_dir, d) _path = os.path.join(env.cache_dir, d)
file = env.controller.get_file(p)
if file is None: try:
# Try to get file from VM
file = env.controller.get_file(p)
if file is None:
logger.warning(f"Failed to get file from VM: {p}")
if i in gives:
cache_paths.append(None)
continue
if i in gives:
cache_paths.append(_path)
# Write file with robust error handling
try:
# Ensure cache directory exists
os.makedirs(env.cache_dir, exist_ok=True)
with open(_path, "wb") as f:
f.write(file)
logger.info(f"Successfully saved file: {_path} ({len(file)} bytes)")
except IOError as e:
logger.error(f"IO error writing file {_path}: {e}")
if i in gives:
cache_paths[-1] = None # Replace the path we just added with None
except Exception as e:
logger.error(f"Unexpected error writing file {_path}: {e}")
if i in gives:
cache_paths[-1] = None
except Exception as e:
logger.error(f"Error processing file {p}: {e}")
if i in gives: if i in gives:
cache_paths.append(None) cache_paths.append(None)
continue
if i in gives:
cache_paths.append(_path)
with open(_path, "wb") as f:
f.write(file)
return cache_paths[0] if len(cache_paths)==1 else cache_paths return cache_paths[0] if len(cache_paths)==1 else cache_paths

View File

@@ -298,34 +298,84 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
""" """
if result is None: if result is None:
logger.warning("Result file path is None, returning 0.0")
return 0.
# Check if file exists
if not os.path.exists(result):
logger.warning(f"Result file does not exist: {result}, returning 0.0")
return 0.
try:
with open(result, 'r', encoding='utf-8') as f:
if is_yaml:
try:
# Use SafeLoader instead of Loader for better security and error handling
result_data: Dict[str, Any] = yaml.safe_load(f)
if result_data is None:
logger.warning(f"YAML file {result} is empty or contains only null values, returning 0.0")
return 0.
except yaml.YAMLError as e:
logger.error(f"YAML parsing error in file {result}: {e}")
logger.error(f"File content might be corrupted or have invalid YAML syntax")
return 0.
except Exception as e:
logger.error(f"Unexpected error parsing YAML file {result}: {e}")
return 0.
else:
try:
result_data: Dict[str, Any] = json.load(f)
except json.JSONDecodeError as e:
logger.error(f"JSON parsing error in file {result}: {e}")
return 0.
except Exception as e:
logger.error(f"Unexpected error parsing JSON file {result}: {e}")
return 0.
except IOError as e:
logger.error(f"IO error reading file {result}: {e}")
return 0.
except Exception as e:
logger.error(f"Unexpected error reading file {result}: {e}")
return 0. return 0.
with open(result) as f:
if is_yaml:
result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader)
else:
result: Dict[str, Any] = json.load(f)
expect_rules = rules.get("expect", {}) expect_rules = rules.get("expect", {})
unexpect_rules = rules.get("unexpect", {}) unexpect_rules = rules.get("unexpect", {})
metric = True metric = True
for r in expect_rules: for r in expect_rules:
value = result value = result_data
for k in r["key"]: try:
try: for k in r["key"]:
value = value[k] try:
except KeyError: value = value[k]
return 0. except KeyError:
metric = metric and _match_value_to_rule(value, r) logger.debug(f"Key '{k}' not found in result data, returning 0.0")
return 0.
except TypeError:
logger.debug(f"Cannot access key '{k}' - value is not a dictionary, returning 0.0")
return 0.
metric = metric and _match_value_to_rule(value, r)
except Exception as e:
logger.error(f"Error processing expect rule {r}: {e}")
return 0.
for r in unexpect_rules: for r in unexpect_rules:
value = result value = result_data
for k in r["key"]: try:
try: for k in r["key"]:
value = value[k] try:
except KeyError: value = value[k]
value = None except KeyError:
break value = None
metric = metric and not _match_value_to_rule(value, r) break
except TypeError:
value = None
break
metric = metric and not _match_value_to_rule(value, r)
except Exception as e:
logger.error(f"Error processing unexpect rule {r}: {e}")
return 0.
return float(metric) return float(metric)

View File

@@ -73,6 +73,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt):
original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13] original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13] modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
if not original_slide_images:
return 0.
the_image = original_slide_images[0] the_image = original_slide_images[0]
the_modified_image = None the_modified_image = None
@@ -395,12 +398,38 @@ def compare_pptx_files(file1_path, file2_path, **options):
table2 = shape2.table table2 = shape2.table
if enable_debug: if enable_debug:
debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns") debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns")
debug_logger.debug(f" Shape {shape_idx} - Table2 has {len(table2.rows)} rows and {len(table2.columns)} columns")
# Check if tables have the same dimensions
if len(table1.rows) != len(table2.rows) or len(table1.columns) != len(table2.columns):
if enable_debug:
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Table dimensions differ:")
debug_logger.debug(f" Table1: {len(table1.rows)} rows x {len(table1.columns)} columns")
debug_logger.debug(f" Table2: {len(table2.rows)} rows x {len(table2.columns)} columns")
return 0
for row_idx in range(len(table1.rows)): for row_idx in range(len(table1.rows)):
for col_idx in range(len(table1.columns)): for col_idx in range(len(table1.columns)):
cell1 = table1.cell(row_idx, col_idx) cell1 = table1.cell(row_idx, col_idx)
cell2 = table2.cell(row_idx, col_idx) cell2 = table2.cell(row_idx, col_idx)
# Check if cells have the same number of paragraphs
if len(cell1.text_frame.paragraphs) != len(cell2.text_frame.paragraphs):
if enable_debug:
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}] - Different number of paragraphs:")
debug_logger.debug(f" Cell1 paragraphs: {len(cell1.text_frame.paragraphs)}")
debug_logger.debug(f" Cell2 paragraphs: {len(cell2.text_frame.paragraphs)}")
return 0
for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)): for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)):
# Check if paragraphs have the same number of runs
if len(para1.runs) != len(para2.runs):
if enable_debug:
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx} - Different number of runs:")
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
return 0
for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)): for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)):
# Check font color # Check font color
if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"): if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
@@ -451,6 +480,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
if shape1.text.strip() != shape2.text.strip() and examine_text: if shape1.text.strip() != shape2.text.strip() and examine_text:
return 0 return 0
# check if the number of paragraphs are the same
if len(shape1.text_frame.paragraphs) != len(shape2.text_frame.paragraphs):
if enable_debug:
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} - Different number of paragraphs:")
debug_logger.debug(f" Shape1 paragraphs: {len(shape1.text_frame.paragraphs)}")
debug_logger.debug(f" Shape2 paragraphs: {len(shape2.text_frame.paragraphs)}")
return 0
# check if the paragraphs are the same # check if the paragraphs are the same
para_idx = 0 para_idx = 0
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
@@ -487,6 +524,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
if para1.level != para2.level and examine_indent: if para1.level != para2.level and examine_indent:
return 0 return 0
# check if the number of runs are the same
if len(para1.runs) != len(para2.runs):
if enable_debug:
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Different number of runs:")
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
return 0
for run1, run2 in zip(para1.runs, para2.runs): for run1, run2 in zip(para1.runs, para2.runs):
# check if the font properties are the same # check if the font properties are the same
@@ -634,6 +679,12 @@ def compare_pptx_files(file1_path, file2_path, **options):
debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'") debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'")
return 0 return 0
# Check if text shapes have the same number of paragraphs
if len(tshape1.text_frame.paragraphs) != len(tshape2.text_frame.paragraphs):
if enable_debug:
debug_logger.debug(f" MISMATCH: Different number of paragraphs - {len(tshape1.text_frame.paragraphs)} vs {len(tshape2.text_frame.paragraphs)}")
return 0
# Compare alignment of each paragraph # Compare alignment of each paragraph
for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)): for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)):
from pptx.enum.text import PP_ALIGN from pptx.enum.text import PP_ALIGN

View File

@@ -36,8 +36,14 @@ def _parse_sheet_idx(sheet_idx: Union[int, str]
# function _parse_sheet_idx {{{ # # function _parse_sheet_idx {{{ #
if isinstance(sheet_idx, int): if isinstance(sheet_idx, int):
try: try:
index: str = result_sheet_names[sheet_idx] if not result_sheet_names or sheet_idx >= len(result_sheet_names):
except: logger.error(f"Sheet index {sheet_idx} out of range. Available sheets: {result_sheet_names}")
index = ""
else:
index: str = result_sheet_names[sheet_idx]
logger.debug(f"Sheet index {sheet_idx} resolved to sheet: {index}")
except Exception as e:
logger.error(f"Error resolving sheet index {sheet_idx}: {e}")
index = "" index = ""
book: BOOK = result book: BOOK = result
elif sheet_idx.startswith("RI"): elif sheet_idx.startswith("RI"):
@@ -114,12 +120,21 @@ def compare_table(result: str, expected: str = None, **options) -> float:
""" """
if result is None: if result is None:
logger.error("Result file path is None")
return 0.
# Check if result file exists
if not os.path.exists(result):
logger.error(f"Result file not found: {result}")
return 0. return 0.
try: try:
logger.info(f"Loading result file: {result}")
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result) xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(result) pdworkbookr = pd.ExcelFile(result)
except: logger.info(f"Successfully loaded result file with sheets: {pdworkbookr.sheet_names}")
except Exception as e:
logger.error(f"Failed to load result file {result}: {e}")
return 0. return 0.
worksheetr_names: List[str] = pdworkbookr.sheet_names worksheetr_names: List[str] = pdworkbookr.sheet_names
@@ -432,19 +447,35 @@ def compare_table(result: str, expected: str = None, **options) -> float:
# props: dict like {attribute: {"method": str, "ref": anything}} # props: dict like {attribute: {"method": str, "ref": anything}}
# supported attributes: value & those supported by utils._read_cell_style # supported attributes: value & those supported by utils._read_cell_style
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) try:
if sheet is None: sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
return 0. if sheet is None:
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke)) logger.error(f"Failed to load sheet for sheet_idx: {r['sheet_idx']}")
cell: Cell = sheet[r["coordinate"]] return 0.
metric: bool = True # data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
for prpt, rule in r["props"].items(): cell: Cell = sheet[r["coordinate"]]
if prpt == "value": metric: bool = True
val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"]) for prpt, rule in r["props"].items():
else: if prpt == "value":
val = _read_cell_style(prpt, cell) try:
parsed_result = parse_idx(r["sheet_idx"], result, expected)
logger.debug(f"parse_idx result: {parsed_result}")
val = read_cell_value(*parsed_result, r["coordinate"])
logger.debug(f"Cell {r['coordinate']} value: {val}")
except Exception as e:
logger.error(f"Failed to read cell value at {r['coordinate']}: {e}")
val = None
else:
try:
val = _read_cell_style(prpt, cell)
except Exception as e:
logger.error(f"Failed to read cell style {prpt} at {r['coordinate']}: {e}")
val = None
metric = metric and _match_value_to_rule(val, rule) metric = metric and _match_value_to_rule(val, rule)
except Exception as e:
logger.error(f"Error in check_cell processing: {e}")
return 0.
logger.debug("Assertion: %s[%s] :%s - %s" logger.debug("Assertion: %s[%s] :%s - %s"
, r["sheet_idx"], r["coordinate"] , r["sheet_idx"], r["coordinate"]

View File

@@ -4,6 +4,7 @@ import functools
import itertools import itertools
import logging import logging
import operator import operator
import os
import re import re
import zipfile import zipfile
#import pandas as pd #import pandas as pd
@@ -33,10 +34,11 @@ V = TypeVar("Value")
logger = logging.getLogger("desktopenv.metrics.utils") logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main") _xlsx_namespaces = [
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main") ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
] ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None _xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
@@ -282,6 +284,13 @@ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ # # read_cell_value {{{ #
logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
# Check if file exists
if not os.path.exists(xlsx_file):
logger.error(f"Excel file not found: {xlsx_file}")
return None
try: try:
with zipfile.ZipFile(xlsx_file, "r") as z_f: with zipfile.ZipFile(xlsx_file, "r") as z_f:
try: try:
@@ -308,9 +317,17 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
, namespaces=_xlsx_ns_mapping , namespaces=_xlsx_ns_mapping
)(sheet) )(sheet)
if len(cells) == 0: if len(cells) == 0:
logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
return None return None
cell: _Element = cells[0] cell: _Element = cells[0]
except zipfile.BadZipFile: except zipfile.BadZipFile as e:
logger.error(f"Bad zip file {xlsx_file}: {e}")
return None
except KeyError as e:
logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
return None
except Exception as e:
logger.error(f"Error reading {xlsx_file}: {e}")
return None return None
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode") cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")