diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 5c2dcdf..a17da39 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -13,7 +13,7 @@ from odf.text import P from odf.text import Span from skimage.color import deltaE_ciede2000 from skimage.color import rgb2lab -from fuzzywuzzy import fuzz +from rapidfuzz import fuzz logger = logging.getLogger("desktopenv.metric.docs") diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index f593524..a35c13b 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -5,19 +5,20 @@ import os.path # import operator from numbers import Number from typing import Any, Union, cast, Callable, Iterable -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Set import openpyxl import pandas as pd from openpyxl import Workbook from openpyxl.cell.cell import Cell -# from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.worksheet.cell_range import MultiCellRange from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet from .utils import _match_value_to_rule, _read_cell_style, read_cell_value from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\ , load_filters, load_pivot_tables +from rapidfuzz import fuzz # from openpyxl.utils import coordinate_to_tuple @@ -157,8 +158,8 @@ def compare_table(result: str, expected: str = None, **options) -> float: return 0. sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)) - sheet1 = sheet1.round() - sheet2 = sheet2.round() + sheet1 = sheet1.round(error_limit) + sheet2 = sheet2.round(error_limit) metric: bool = sheet1.equals(sheet2) logger.debug("Sheet1: \n%s", str(sheet1)) logger.debug("Sheet2: \n%s", str(sheet2)) @@ -186,6 +187,61 @@ def compare_table(result: str, expected: str = None, **options) -> float: logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Compare Sheet Data by Printed Value # + elif r["type"] == "sheet_fuzzy": + # Fuzzy Match for Ranges {{{ # + # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1" + # sheet_idx1: as sheet_idx0 + # rules: list of dict, each dict is like + # { "range": ["A1:B6", "C2:E5"], + # "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1 + # "threshold": 85, // for fuzzy match + # "ignore_case": true | false, + # "ignore_chars": " ()", # filtered out + # "trim_leadings": "+ ", # filtered by lstrip + # "trim_trailings": "", # filtered by rstrip + # "normalization": [["Rd", "Road"]], # filtered by replace + # } + + sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected) + sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected) + total_metric = True + for rl in r["rules"]: + for rng in MultiCellRange(rl["range"]): + for cdn in rng.cells: + value1: str = str(read_cell_value(*sheet1, cdn)) + value2: str = str(read_cell_value(*sheet2, cdn)) + + for rplc in rl.get("normalization", []): + value1 = value1.replace(rplc[0], rplc[1]) + value2 = value2.replace(rplc[0], rplc[1]) + if "trim_leadings" in rl: + value1 = value1.lstrip(rl["trim_leadings"]) + value2 = value2.lstrip(rl["trim_leadings"]) + if "trim_trailings" in rl: + value1 = value1.rstrip(rl["trim_trailings"]) + value2 = value2.rstrip(rl["trim_trailings"]) + if "ignore_chars" in rl: + ignore_chars: Set[str] = set(rl["ignore_chars"]) + value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1)) + value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2)) + if rl.get("ignore_case", False): + value1 = value1.lower() + value2 = value2.lower() + + if rl["type"]=="includes": + metric: bool = value1 in value2 + if rl["type"]=="includes_by": + metric: bool = value2 in value1 + if rl["type"]=="fuzzy_match": + metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) + if rl["type"]=="exact_match": + metric: bool = value1==value2 + total_metric = total_metric and metric + + metric: bool = total_metric + logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) + # }}} Fuzzy Match for Ranges # + elif r["type"] == "sparkline": # Compare Sparklines {{{ # # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1" diff --git a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json index 67d82b5..989bc34 100644 --- a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json +++ b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json @@ -53,51 +53,75 @@ ], "evaluator": { "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "MUST_VISIT.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, { "type": "execute", "parameters": { "command": [ - "libreoffice", - "--convert-to", - "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", - "--outdir", - "/home/user/Desktop", - "/home/user/Desktop/MUST_VISIT.xlsx" + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } } ], "func": "compare_table", "result": { "type": "vm_file", - "path": [ - "/home/user/Desktop/MUST_VISIT.xlsx", - "/home/user/Desktop/MUST_VISIT-Sheet1.csv" - ], - "dest": [ - "MUST_VISIT.xlsx", - "MUST_VISIT-Sheet1.csv" - ], - "multi": true + "path": "/home/user/Desktop/MUST_VISIT.xlsx", + "dest": "MUST_VISIT.xlsx" }, "expected": { "type": "cloud_file", - "path": [ - "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download", - "https://drive.google.com/uc?id=1CGoRQDLw9-Ai7daq3qCz0o9kYSZB2WNn&export=download" - ], - "dest": [ - "MUST_VISIT-gt.xlsx", - "MUST_VISIT-gt-Sheet1.csv" - ], - "multi": true + "path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download", + "dest": "MUST_VISIT-gt.xlsx" }, "options": { "rules": [ { - "type": "sheet_print", + "type": "sheet_fuzzy", "sheet_idx0": "RNSheet1", - "sheet_idx1": "ENSheet1" + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": ["A1:A6", "D1:D6"], + "type": "exact_match" + }, + { + "range": ["B1:B6"], + "type": "fuzzy_match", + "threshold": 85, + "normalization": [ + ["Rd", "Road"], + ["St", "Street"] + ], + "ignore_case": true + }, + { + "range": ["C1:C6"], + "type": "includes", + "trim_leadings": "+ ", + "ignore_chars": " ()-" + } + ] } ] }