fix: Enhance error handling and logging across multiple evaluators
- Added logging for file retrieval and error handling in file.py, improving robustness during file operations. - Implemented checks for file existence and parsing errors in general.py, enhancing reliability in JSON/YAML processing. - Improved table comparison logic in table.py with detailed error logging for sheet loading and cell value reading. - Enhanced metrics evaluation in slides.py with additional checks for paragraph and run counts, ensuring thorough comparison. - Updated utils.py to include file existence checks and detailed error logging during cell value reading.
This commit is contained in:
@@ -1,10 +1,13 @@
|
||||
import os
|
||||
import logging
|
||||
from typing import Dict, List, Set
|
||||
from typing import Optional, Any, Union
|
||||
from datetime import datetime
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger("desktopenv.getter.file")
|
||||
|
||||
|
||||
def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any:
|
||||
"""
|
||||
@@ -101,16 +104,42 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
|
||||
|
||||
for i, (p, d) in enumerate(zip(paths, dests)):
|
||||
_path = os.path.join(env.cache_dir, d)
|
||||
file = env.controller.get_file(p)
|
||||
if file is None:
|
||||
|
||||
try:
|
||||
# Try to get file from VM
|
||||
file = env.controller.get_file(p)
|
||||
if file is None:
|
||||
logger.warning(f"Failed to get file from VM: {p}")
|
||||
if i in gives:
|
||||
cache_paths.append(None)
|
||||
continue
|
||||
|
||||
if i in gives:
|
||||
cache_paths.append(_path)
|
||||
|
||||
# Write file with robust error handling
|
||||
try:
|
||||
# Ensure cache directory exists
|
||||
os.makedirs(env.cache_dir, exist_ok=True)
|
||||
|
||||
with open(_path, "wb") as f:
|
||||
f.write(file)
|
||||
logger.info(f"Successfully saved file: {_path} ({len(file)} bytes)")
|
||||
|
||||
except IOError as e:
|
||||
logger.error(f"IO error writing file {_path}: {e}")
|
||||
if i in gives:
|
||||
cache_paths[-1] = None # Replace the path we just added with None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error writing file {_path}: {e}")
|
||||
if i in gives:
|
||||
cache_paths[-1] = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file {p}: {e}")
|
||||
if i in gives:
|
||||
cache_paths.append(None)
|
||||
continue
|
||||
|
||||
if i in gives:
|
||||
cache_paths.append(_path)
|
||||
with open(_path, "wb") as f:
|
||||
f.write(file)
|
||||
|
||||
return cache_paths[0] if len(cache_paths)==1 else cache_paths
|
||||
|
||||
|
||||
|
||||
@@ -298,34 +298,84 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
|
||||
"""
|
||||
|
||||
if result is None:
|
||||
logger.warning("Result file path is None, returning 0.0")
|
||||
return 0.
|
||||
|
||||
# Check if file exists
|
||||
if not os.path.exists(result):
|
||||
logger.warning(f"Result file does not exist: {result}, returning 0.0")
|
||||
return 0.
|
||||
|
||||
try:
|
||||
with open(result, 'r', encoding='utf-8') as f:
|
||||
if is_yaml:
|
||||
try:
|
||||
# Use SafeLoader instead of Loader for better security and error handling
|
||||
result_data: Dict[str, Any] = yaml.safe_load(f)
|
||||
if result_data is None:
|
||||
logger.warning(f"YAML file {result} is empty or contains only null values, returning 0.0")
|
||||
return 0.
|
||||
except yaml.YAMLError as e:
|
||||
logger.error(f"YAML parsing error in file {result}: {e}")
|
||||
logger.error(f"File content might be corrupted or have invalid YAML syntax")
|
||||
return 0.
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error parsing YAML file {result}: {e}")
|
||||
return 0.
|
||||
else:
|
||||
try:
|
||||
result_data: Dict[str, Any] = json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"JSON parsing error in file {result}: {e}")
|
||||
return 0.
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error parsing JSON file {result}: {e}")
|
||||
return 0.
|
||||
except IOError as e:
|
||||
logger.error(f"IO error reading file {result}: {e}")
|
||||
return 0.
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error reading file {result}: {e}")
|
||||
return 0.
|
||||
with open(result) as f:
|
||||
if is_yaml:
|
||||
result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader)
|
||||
else:
|
||||
result: Dict[str, Any] = json.load(f)
|
||||
|
||||
expect_rules = rules.get("expect", {})
|
||||
unexpect_rules = rules.get("unexpect", {})
|
||||
|
||||
metric = True
|
||||
for r in expect_rules:
|
||||
value = result
|
||||
for k in r["key"]:
|
||||
try:
|
||||
value = value[k]
|
||||
except KeyError:
|
||||
return 0.
|
||||
metric = metric and _match_value_to_rule(value, r)
|
||||
value = result_data
|
||||
try:
|
||||
for k in r["key"]:
|
||||
try:
|
||||
value = value[k]
|
||||
except KeyError:
|
||||
logger.debug(f"Key '{k}' not found in result data, returning 0.0")
|
||||
return 0.
|
||||
except TypeError:
|
||||
logger.debug(f"Cannot access key '{k}' - value is not a dictionary, returning 0.0")
|
||||
return 0.
|
||||
metric = metric and _match_value_to_rule(value, r)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing expect rule {r}: {e}")
|
||||
return 0.
|
||||
|
||||
for r in unexpect_rules:
|
||||
value = result
|
||||
for k in r["key"]:
|
||||
try:
|
||||
value = value[k]
|
||||
except KeyError:
|
||||
value = None
|
||||
break
|
||||
metric = metric and not _match_value_to_rule(value, r)
|
||||
value = result_data
|
||||
try:
|
||||
for k in r["key"]:
|
||||
try:
|
||||
value = value[k]
|
||||
except KeyError:
|
||||
value = None
|
||||
break
|
||||
except TypeError:
|
||||
value = None
|
||||
break
|
||||
metric = metric and not _match_value_to_rule(value, r)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing unexpect rule {r}: {e}")
|
||||
return 0.
|
||||
|
||||
return float(metric)
|
||||
|
||||
|
||||
|
||||
@@ -73,6 +73,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt):
|
||||
original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
|
||||
modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
|
||||
|
||||
if not original_slide_images:
|
||||
return 0.
|
||||
|
||||
the_image = original_slide_images[0]
|
||||
|
||||
the_modified_image = None
|
||||
@@ -395,12 +398,38 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
table2 = shape2.table
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns")
|
||||
debug_logger.debug(f" Shape {shape_idx} - Table2 has {len(table2.rows)} rows and {len(table2.columns)} columns")
|
||||
|
||||
# Check if tables have the same dimensions
|
||||
if len(table1.rows) != len(table2.rows) or len(table1.columns) != len(table2.columns):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Table dimensions differ:")
|
||||
debug_logger.debug(f" Table1: {len(table1.rows)} rows x {len(table1.columns)} columns")
|
||||
debug_logger.debug(f" Table2: {len(table2.rows)} rows x {len(table2.columns)} columns")
|
||||
return 0
|
||||
|
||||
for row_idx in range(len(table1.rows)):
|
||||
for col_idx in range(len(table1.columns)):
|
||||
cell1 = table1.cell(row_idx, col_idx)
|
||||
cell2 = table2.cell(row_idx, col_idx)
|
||||
|
||||
# Check if cells have the same number of paragraphs
|
||||
if len(cell1.text_frame.paragraphs) != len(cell2.text_frame.paragraphs):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}] - Different number of paragraphs:")
|
||||
debug_logger.debug(f" Cell1 paragraphs: {len(cell1.text_frame.paragraphs)}")
|
||||
debug_logger.debug(f" Cell2 paragraphs: {len(cell2.text_frame.paragraphs)}")
|
||||
return 0
|
||||
|
||||
for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)):
|
||||
# Check if paragraphs have the same number of runs
|
||||
if len(para1.runs) != len(para2.runs):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx} - Different number of runs:")
|
||||
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
|
||||
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
|
||||
return 0
|
||||
|
||||
for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)):
|
||||
# Check font color
|
||||
if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
|
||||
@@ -451,6 +480,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
if shape1.text.strip() != shape2.text.strip() and examine_text:
|
||||
return 0
|
||||
|
||||
# check if the number of paragraphs are the same
|
||||
if len(shape1.text_frame.paragraphs) != len(shape2.text_frame.paragraphs):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} - Different number of paragraphs:")
|
||||
debug_logger.debug(f" Shape1 paragraphs: {len(shape1.text_frame.paragraphs)}")
|
||||
debug_logger.debug(f" Shape2 paragraphs: {len(shape2.text_frame.paragraphs)}")
|
||||
return 0
|
||||
|
||||
# check if the paragraphs are the same
|
||||
para_idx = 0
|
||||
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
|
||||
@@ -487,6 +524,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
if para1.level != para2.level and examine_indent:
|
||||
return 0
|
||||
|
||||
# check if the number of runs are the same
|
||||
if len(para1.runs) != len(para2.runs):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Different number of runs:")
|
||||
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
|
||||
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
|
||||
return 0
|
||||
|
||||
for run1, run2 in zip(para1.runs, para2.runs):
|
||||
|
||||
# check if the font properties are the same
|
||||
@@ -634,6 +679,12 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'")
|
||||
return 0
|
||||
|
||||
# Check if text shapes have the same number of paragraphs
|
||||
if len(tshape1.text_frame.paragraphs) != len(tshape2.text_frame.paragraphs):
|
||||
if enable_debug:
|
||||
debug_logger.debug(f" MISMATCH: Different number of paragraphs - {len(tshape1.text_frame.paragraphs)} vs {len(tshape2.text_frame.paragraphs)}")
|
||||
return 0
|
||||
|
||||
# Compare alignment of each paragraph
|
||||
for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)):
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
|
||||
@@ -36,8 +36,14 @@ def _parse_sheet_idx(sheet_idx: Union[int, str]
|
||||
# function _parse_sheet_idx {{{ #
|
||||
if isinstance(sheet_idx, int):
|
||||
try:
|
||||
index: str = result_sheet_names[sheet_idx]
|
||||
except:
|
||||
if not result_sheet_names or sheet_idx >= len(result_sheet_names):
|
||||
logger.error(f"Sheet index {sheet_idx} out of range. Available sheets: {result_sheet_names}")
|
||||
index = ""
|
||||
else:
|
||||
index: str = result_sheet_names[sheet_idx]
|
||||
logger.debug(f"Sheet index {sheet_idx} resolved to sheet: {index}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error resolving sheet index {sheet_idx}: {e}")
|
||||
index = ""
|
||||
book: BOOK = result
|
||||
elif sheet_idx.startswith("RI"):
|
||||
@@ -114,12 +120,21 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
"""
|
||||
|
||||
if result is None:
|
||||
logger.error("Result file path is None")
|
||||
return 0.
|
||||
|
||||
# Check if result file exists
|
||||
if not os.path.exists(result):
|
||||
logger.error(f"Result file not found: {result}")
|
||||
return 0.
|
||||
|
||||
try:
|
||||
logger.info(f"Loading result file: {result}")
|
||||
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
|
||||
pdworkbookr = pd.ExcelFile(result)
|
||||
except:
|
||||
logger.info(f"Successfully loaded result file with sheets: {pdworkbookr.sheet_names}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load result file {result}: {e}")
|
||||
return 0.
|
||||
worksheetr_names: List[str] = pdworkbookr.sheet_names
|
||||
|
||||
@@ -432,19 +447,35 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
# props: dict like {attribute: {"method": str, "ref": anything}}
|
||||
# supported attributes: value & those supported by utils._read_cell_style
|
||||
|
||||
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
|
||||
if sheet is None:
|
||||
return 0.
|
||||
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
|
||||
cell: Cell = sheet[r["coordinate"]]
|
||||
metric: bool = True
|
||||
for prpt, rule in r["props"].items():
|
||||
if prpt == "value":
|
||||
val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"])
|
||||
else:
|
||||
val = _read_cell_style(prpt, cell)
|
||||
try:
|
||||
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
|
||||
if sheet is None:
|
||||
logger.error(f"Failed to load sheet for sheet_idx: {r['sheet_idx']}")
|
||||
return 0.
|
||||
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
|
||||
cell: Cell = sheet[r["coordinate"]]
|
||||
metric: bool = True
|
||||
for prpt, rule in r["props"].items():
|
||||
if prpt == "value":
|
||||
try:
|
||||
parsed_result = parse_idx(r["sheet_idx"], result, expected)
|
||||
logger.debug(f"parse_idx result: {parsed_result}")
|
||||
val = read_cell_value(*parsed_result, r["coordinate"])
|
||||
logger.debug(f"Cell {r['coordinate']} value: {val}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read cell value at {r['coordinate']}: {e}")
|
||||
val = None
|
||||
else:
|
||||
try:
|
||||
val = _read_cell_style(prpt, cell)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read cell style {prpt} at {r['coordinate']}: {e}")
|
||||
val = None
|
||||
|
||||
metric = metric and _match_value_to_rule(val, rule)
|
||||
metric = metric and _match_value_to_rule(val, rule)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in check_cell processing: {e}")
|
||||
return 0.
|
||||
|
||||
logger.debug("Assertion: %s[%s] :%s - %s"
|
||||
, r["sheet_idx"], r["coordinate"]
|
||||
|
||||
@@ -4,6 +4,7 @@ import functools
|
||||
import itertools
|
||||
import logging
|
||||
import operator
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
#import pandas as pd
|
||||
@@ -33,10 +34,11 @@ V = TypeVar("Value")
|
||||
|
||||
logger = logging.getLogger("desktopenv.metrics.utils")
|
||||
|
||||
_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
|
||||
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
||||
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
||||
]
|
||||
_xlsx_namespaces = [
|
||||
("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
|
||||
("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
|
||||
("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
||||
]
|
||||
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
||||
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
||||
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
||||
@@ -282,6 +284,13 @@ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx
|
||||
|
||||
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
# read_cell_value {{{ #
|
||||
logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
|
||||
|
||||
# Check if file exists
|
||||
if not os.path.exists(xlsx_file):
|
||||
logger.error(f"Excel file not found: {xlsx_file}")
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
||||
try:
|
||||
@@ -308,9 +317,17 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
, namespaces=_xlsx_ns_mapping
|
||||
)(sheet)
|
||||
if len(cells) == 0:
|
||||
logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
|
||||
return None
|
||||
cell: _Element = cells[0]
|
||||
except zipfile.BadZipFile:
|
||||
except zipfile.BadZipFile as e:
|
||||
logger.error(f"Bad zip file {xlsx_file}: {e}")
|
||||
return None
|
||||
except KeyError as e:
|
||||
logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {xlsx_file}: {e}")
|
||||
return None
|
||||
|
||||
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
|
||||
|
||||
Reference in New Issue
Block a user