From 0651495d8801ec3ae5b0bcf32d362a1c5a440b40 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Mon, 14 Jul 2025 05:43:17 +0000 Subject: [PATCH] fix: Enhance error handling and logging across multiple evaluators - Added logging for file retrieval and error handling in file.py, improving robustness during file operations. - Implemented checks for file existence and parsing errors in general.py, enhancing reliability in JSON/YAML processing. - Improved table comparison logic in table.py with detailed error logging for sheet loading and cell value reading. - Enhanced metrics evaluation in slides.py with additional checks for paragraph and run counts, ensuring thorough comparison. - Updated utils.py to include file existence checks and detailed error logging during cell value reading. --- desktop_env/evaluators/getters/file.py | 45 ++++++++++-- desktop_env/evaluators/metrics/general.py | 90 ++++++++++++++++++----- desktop_env/evaluators/metrics/slides.py | 51 +++++++++++++ desktop_env/evaluators/metrics/table.py | 61 +++++++++++---- desktop_env/evaluators/metrics/utils.py | 27 +++++-- 5 files changed, 226 insertions(+), 48 deletions(-) diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index f4ab03a..f329d10 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,10 +1,13 @@ import os +import logging from typing import Dict, List, Set from typing import Optional, Any, Union from datetime import datetime import requests import pandas as pd +logger = logging.getLogger("desktopenv.getter.file") + def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any: """ @@ -101,16 +104,42 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option for i, (p, d) in enumerate(zip(paths, dests)): _path = os.path.join(env.cache_dir, d) - file = env.controller.get_file(p) - if file is None: + + try: + # Try to get file from VM + file = env.controller.get_file(p) + if file is None: + logger.warning(f"Failed to get file from VM: {p}") + if i in gives: + cache_paths.append(None) + continue + + if i in gives: + cache_paths.append(_path) + + # Write file with robust error handling + try: + # Ensure cache directory exists + os.makedirs(env.cache_dir, exist_ok=True) + + with open(_path, "wb") as f: + f.write(file) + logger.info(f"Successfully saved file: {_path} ({len(file)} bytes)") + + except IOError as e: + logger.error(f"IO error writing file {_path}: {e}") + if i in gives: + cache_paths[-1] = None # Replace the path we just added with None + except Exception as e: + logger.error(f"Unexpected error writing file {_path}: {e}") + if i in gives: + cache_paths[-1] = None + + except Exception as e: + logger.error(f"Error processing file {p}: {e}") if i in gives: cache_paths.append(None) - continue - - if i in gives: - cache_paths.append(_path) - with open(_path, "wb") as f: - f.write(file) + return cache_paths[0] if len(cache_paths)==1 else cache_paths diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index d0f6195..27356a2 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -298,34 +298,84 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str """ if result is None: + logger.warning("Result file path is None, returning 0.0") + return 0. + + # Check if file exists + if not os.path.exists(result): + logger.warning(f"Result file does not exist: {result}, returning 0.0") + return 0. + + try: + with open(result, 'r', encoding='utf-8') as f: + if is_yaml: + try: + # Use SafeLoader instead of Loader for better security and error handling + result_data: Dict[str, Any] = yaml.safe_load(f) + if result_data is None: + logger.warning(f"YAML file {result} is empty or contains only null values, returning 0.0") + return 0. + except yaml.YAMLError as e: + logger.error(f"YAML parsing error in file {result}: {e}") + logger.error(f"File content might be corrupted or have invalid YAML syntax") + return 0. + except Exception as e: + logger.error(f"Unexpected error parsing YAML file {result}: {e}") + return 0. + else: + try: + result_data: Dict[str, Any] = json.load(f) + except json.JSONDecodeError as e: + logger.error(f"JSON parsing error in file {result}: {e}") + return 0. + except Exception as e: + logger.error(f"Unexpected error parsing JSON file {result}: {e}") + return 0. + except IOError as e: + logger.error(f"IO error reading file {result}: {e}") + return 0. + except Exception as e: + logger.error(f"Unexpected error reading file {result}: {e}") return 0. - with open(result) as f: - if is_yaml: - result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader) - else: - result: Dict[str, Any] = json.load(f) expect_rules = rules.get("expect", {}) unexpect_rules = rules.get("unexpect", {}) metric = True for r in expect_rules: - value = result - for k in r["key"]: - try: - value = value[k] - except KeyError: - return 0. - metric = metric and _match_value_to_rule(value, r) + value = result_data + try: + for k in r["key"]: + try: + value = value[k] + except KeyError: + logger.debug(f"Key '{k}' not found in result data, returning 0.0") + return 0. + except TypeError: + logger.debug(f"Cannot access key '{k}' - value is not a dictionary, returning 0.0") + return 0. + metric = metric and _match_value_to_rule(value, r) + except Exception as e: + logger.error(f"Error processing expect rule {r}: {e}") + return 0. + for r in unexpect_rules: - value = result - for k in r["key"]: - try: - value = value[k] - except KeyError: - value = None - break - metric = metric and not _match_value_to_rule(value, r) + value = result_data + try: + for k in r["key"]: + try: + value = value[k] + except KeyError: + value = None + break + except TypeError: + value = None + break + metric = metric and not _match_value_to_rule(value, r) + except Exception as e: + logger.error(f"Error processing unexpect rule {r}: {e}") + return 0. + return float(metric) diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index bee4f1a..81c4af9 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -73,6 +73,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt): original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13] modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13] + if not original_slide_images: + return 0. + the_image = original_slide_images[0] the_modified_image = None @@ -395,12 +398,38 @@ def compare_pptx_files(file1_path, file2_path, **options): table2 = shape2.table if enable_debug: debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns") + debug_logger.debug(f" Shape {shape_idx} - Table2 has {len(table2.rows)} rows and {len(table2.columns)} columns") + + # Check if tables have the same dimensions + if len(table1.rows) != len(table2.rows) or len(table1.columns) != len(table2.columns): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Table dimensions differ:") + debug_logger.debug(f" Table1: {len(table1.rows)} rows x {len(table1.columns)} columns") + debug_logger.debug(f" Table2: {len(table2.rows)} rows x {len(table2.columns)} columns") + return 0 + for row_idx in range(len(table1.rows)): for col_idx in range(len(table1.columns)): cell1 = table1.cell(row_idx, col_idx) cell2 = table2.cell(row_idx, col_idx) + # Check if cells have the same number of paragraphs + if len(cell1.text_frame.paragraphs) != len(cell2.text_frame.paragraphs): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}] - Different number of paragraphs:") + debug_logger.debug(f" Cell1 paragraphs: {len(cell1.text_frame.paragraphs)}") + debug_logger.debug(f" Cell2 paragraphs: {len(cell2.text_frame.paragraphs)}") + return 0 + for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)): + # Check if paragraphs have the same number of runs + if len(para1.runs) != len(para2.runs): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx} - Different number of runs:") + debug_logger.debug(f" Para1 runs: {len(para1.runs)}") + debug_logger.debug(f" Para2 runs: {len(para2.runs)}") + return 0 + for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)): # Check font color if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"): @@ -451,6 +480,14 @@ def compare_pptx_files(file1_path, file2_path, **options): if shape1.text.strip() != shape2.text.strip() and examine_text: return 0 + # check if the number of paragraphs are the same + if len(shape1.text_frame.paragraphs) != len(shape2.text_frame.paragraphs): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} - Different number of paragraphs:") + debug_logger.debug(f" Shape1 paragraphs: {len(shape1.text_frame.paragraphs)}") + debug_logger.debug(f" Shape2 paragraphs: {len(shape2.text_frame.paragraphs)}") + return 0 + # check if the paragraphs are the same para_idx = 0 for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): @@ -487,6 +524,14 @@ def compare_pptx_files(file1_path, file2_path, **options): if para1.level != para2.level and examine_indent: return 0 + # check if the number of runs are the same + if len(para1.runs) != len(para2.runs): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Different number of runs:") + debug_logger.debug(f" Para1 runs: {len(para1.runs)}") + debug_logger.debug(f" Para2 runs: {len(para2.runs)}") + return 0 + for run1, run2 in zip(para1.runs, para2.runs): # check if the font properties are the same @@ -634,6 +679,12 @@ def compare_pptx_files(file1_path, file2_path, **options): debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'") return 0 + # Check if text shapes have the same number of paragraphs + if len(tshape1.text_frame.paragraphs) != len(tshape2.text_frame.paragraphs): + if enable_debug: + debug_logger.debug(f" MISMATCH: Different number of paragraphs - {len(tshape1.text_frame.paragraphs)} vs {len(tshape2.text_frame.paragraphs)}") + return 0 + # Compare alignment of each paragraph for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)): from pptx.enum.text import PP_ALIGN diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index db51850..5a0f79d 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -36,8 +36,14 @@ def _parse_sheet_idx(sheet_idx: Union[int, str] # function _parse_sheet_idx {{{ # if isinstance(sheet_idx, int): try: - index: str = result_sheet_names[sheet_idx] - except: + if not result_sheet_names or sheet_idx >= len(result_sheet_names): + logger.error(f"Sheet index {sheet_idx} out of range. Available sheets: {result_sheet_names}") + index = "" + else: + index: str = result_sheet_names[sheet_idx] + logger.debug(f"Sheet index {sheet_idx} resolved to sheet: {index}") + except Exception as e: + logger.error(f"Error resolving sheet index {sheet_idx}: {e}") index = "" book: BOOK = result elif sheet_idx.startswith("RI"): @@ -114,12 +120,21 @@ def compare_table(result: str, expected: str = None, **options) -> float: """ if result is None: + logger.error("Result file path is None") + return 0. + + # Check if result file exists + if not os.path.exists(result): + logger.error(f"Result file not found: {result}") return 0. try: + logger.info(f"Loading result file: {result}") xlworkbookr: Workbook = openpyxl.load_workbook(filename=result) pdworkbookr = pd.ExcelFile(result) - except: + logger.info(f"Successfully loaded result file with sheets: {pdworkbookr.sheet_names}") + except Exception as e: + logger.error(f"Failed to load result file {result}: {e}") return 0. worksheetr_names: List[str] = pdworkbookr.sheet_names @@ -432,19 +447,35 @@ def compare_table(result: str, expected: str = None, **options) -> float: # props: dict like {attribute: {"method": str, "ref": anything}} # supported attributes: value & those supported by utils._read_cell_style - sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) - if sheet is None: - return 0. - # data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke)) - cell: Cell = sheet[r["coordinate"]] - metric: bool = True - for prpt, rule in r["props"].items(): - if prpt == "value": - val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"]) - else: - val = _read_cell_style(prpt, cell) + try: + sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) + if sheet is None: + logger.error(f"Failed to load sheet for sheet_idx: {r['sheet_idx']}") + return 0. + # data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke)) + cell: Cell = sheet[r["coordinate"]] + metric: bool = True + for prpt, rule in r["props"].items(): + if prpt == "value": + try: + parsed_result = parse_idx(r["sheet_idx"], result, expected) + logger.debug(f"parse_idx result: {parsed_result}") + val = read_cell_value(*parsed_result, r["coordinate"]) + logger.debug(f"Cell {r['coordinate']} value: {val}") + except Exception as e: + logger.error(f"Failed to read cell value at {r['coordinate']}: {e}") + val = None + else: + try: + val = _read_cell_style(prpt, cell) + except Exception as e: + logger.error(f"Failed to read cell style {prpt} at {r['coordinate']}: {e}") + val = None - metric = metric and _match_value_to_rule(val, rule) + metric = metric and _match_value_to_rule(val, rule) + except Exception as e: + logger.error(f"Error in check_cell processing: {e}") + return 0. logger.debug("Assertion: %s[%s] :%s - %s" , r["sheet_idx"], r["coordinate"] diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index e512a26..1136655 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -4,6 +4,7 @@ import functools import itertools import logging import operator +import os import re import zipfile #import pandas as pd @@ -33,10 +34,11 @@ V = TypeVar("Value") logger = logging.getLogger("desktopenv.metrics.utils") -_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main") - , ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") - , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") - ] +_xlsx_namespaces = [ + ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"), + ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"), + ("xm", "http://schemas.microsoft.com/office/excel/2006/main") +] _xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None @@ -282,6 +284,13 @@ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: # read_cell_value {{{ # + logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}") + + # Check if file exists + if not os.path.exists(xlsx_file): + logger.error(f"Excel file not found: {xlsx_file}") + return None + try: with zipfile.ZipFile(xlsx_file, "r") as z_f: try: @@ -308,9 +317,17 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: , namespaces=_xlsx_ns_mapping )(sheet) if len(cells) == 0: + logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}") return None cell: _Element = cells[0] - except zipfile.BadZipFile: + except zipfile.BadZipFile as e: + logger.error(f"Bad zip file {xlsx_file}: {e}") + return None + except KeyError as e: + logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}") + return None + except Exception as e: + logger.error(f"Error reading {xlsx_file}: {e}") return None cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")