diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 5c2dcdf..a17da39 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -13,7 +13,7 @@ from odf.text import P from odf.text import Span from skimage.color import deltaE_ciede2000 from skimage.color import rgb2lab -from fuzzywuzzy import fuzz +from rapidfuzz import fuzz logger = logging.getLogger("desktopenv.metric.docs") diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index f593524..a35c13b 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -5,19 +5,20 @@ import os.path # import operator from numbers import Number from typing import Any, Union, cast, Callable, Iterable -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Set import openpyxl import pandas as pd from openpyxl import Workbook from openpyxl.cell.cell import Cell -# from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.worksheet.cell_range import MultiCellRange from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet from .utils import _match_value_to_rule, _read_cell_style, read_cell_value from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\ , load_filters, load_pivot_tables +from rapidfuzz import fuzz # from openpyxl.utils import coordinate_to_tuple @@ -157,8 +158,8 @@ def compare_table(result: str, expected: str = None, **options) -> float: return 0. sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)) - sheet1 = sheet1.round() - sheet2 = sheet2.round() + sheet1 = sheet1.round(error_limit) + sheet2 = sheet2.round(error_limit) metric: bool = sheet1.equals(sheet2) logger.debug("Sheet1: \n%s", str(sheet1)) logger.debug("Sheet2: \n%s", str(sheet2)) @@ -186,6 +187,61 @@ def compare_table(result: str, expected: str = None, **options) -> float: logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Compare Sheet Data by Printed Value # + elif r["type"] == "sheet_fuzzy": + # Fuzzy Match for Ranges {{{ # + # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1" + # sheet_idx1: as sheet_idx0 + # rules: list of dict, each dict is like + # { "range": ["A1:B6", "C2:E5"], + # "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1 + # "threshold": 85, // for fuzzy match + # "ignore_case": true | false, + # "ignore_chars": " ()", # filtered out + # "trim_leadings": "+ ", # filtered by lstrip + # "trim_trailings": "", # filtered by rstrip + # "normalization": [["Rd", "Road"]], # filtered by replace + # } + + sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected) + sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected) + total_metric = True + for rl in r["rules"]: + for rng in MultiCellRange(rl["range"]): + for cdn in rng.cells: + value1: str = str(read_cell_value(*sheet1, cdn)) + value2: str = str(read_cell_value(*sheet2, cdn)) + + for rplc in rl.get("normalization", []): + value1 = value1.replace(rplc[0], rplc[1]) + value2 = value2.replace(rplc[0], rplc[1]) + if "trim_leadings" in rl: + value1 = value1.lstrip(rl["trim_leadings"]) + value2 = value2.lstrip(rl["trim_leadings"]) + if "trim_trailings" in rl: + value1 = value1.rstrip(rl["trim_trailings"]) + value2 = value2.rstrip(rl["trim_trailings"]) + if "ignore_chars" in rl: + ignore_chars: Set[str] = set(rl["ignore_chars"]) + value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1)) + value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2)) + if rl.get("ignore_case", False): + value1 = value1.lower() + value2 = value2.lower() + + if rl["type"]=="includes": + metric: bool = value1 in value2 + if rl["type"]=="includes_by": + metric: bool = value2 in value1 + if rl["type"]=="fuzzy_match": + metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) + if rl["type"]=="exact_match": + metric: bool = value1==value2 + total_metric = total_metric and metric + + metric: bool = total_metric + logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) + # }}} Fuzzy Match for Ranges # + elif r["type"] == "sparkline": # Compare Sparklines {{{ # # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1" diff --git a/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json b/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json new file mode 100644 index 0000000..d09222a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json @@ -0,0 +1,25 @@ +{ + "id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38", + "snapshot": "libreoffice_calc", + "instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed.", + "source": "authors", + "config": [ + ], + "trajectory": "trajectories/26660ad1-6ebb-4f59-8cba-a8432dfe8d38", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json b/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json new file mode 100644 index 0000000..a6174f7 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json @@ -0,0 +1,64 @@ +{ + "id": "36037439-2044-4b50-b9d1-875b5a332143", + "snapshot": "libreoffice_calc", + "instruction": "Could you please pull up the Google Scholar page of the corresponding author for me in Chrome?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1iTjv59rn8vcvUsh2-M7T5BLyNoutCwYo&export=download&authuser=0&confirm=t&uuid=cc13ea06-8d21-4d15-adb4-8fec94304bde&at=APZUnTX9ydwrAV0UPVKzYS9-LBlH:1709520068240", + "path": "/home/user/Desktop/shi17a.pdf" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/shi17a.pdf" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-40)" + ] + } + } + ], + "trajectory": "trajectories/36037439-2044-4b50-b9d1-875b5a332143", + "related_apps": [ + "OS", + "Chrome" + ], + "evaluator": { + "func": "is_expected_url_pattern_match", + "result": { + "type": "active_url_from_accessTree", + "goto_prefix": "https://" + }, + "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "?hl=en&", + "expected": [ + "https://scholar.google.com/citations", + "user=qRAQ5BsAAAAJ" + ] + } + } + } +} diff --git a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json new file mode 100644 index 0000000..63cd0d5 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json @@ -0,0 +1,54 @@ +{ + "id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0", + "snapshot": "libreoffice_calc", + "instruction": "I'm really enjoying this paper. Could you please find the personal webpages of the first author and the last three authors, and add them to a browser bookmark folder named 'Liked Authors'?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1IlJ8kU5MlR6OqJHchsSUJzLCmcrG-8N7&export=download&authuser=0&confirm=t&uuid=d2a1810f-edea-4bfd-9d79-e668b9f11876&at=APZUnTVv_eqtC86YzkEU8_jIhC9W:1709522229162", + "path": "/home/user/Desktop/2206.08853.pdf" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/2206.08853.pdf" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)" + ] + } + } + ], + "trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json index 67d82b5..989bc34 100644 --- a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json +++ b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json @@ -53,51 +53,75 @@ ], "evaluator": { "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "MUST_VISIT.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, { "type": "execute", "parameters": { "command": [ - "libreoffice", - "--convert-to", - "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", - "--outdir", - "/home/user/Desktop", - "/home/user/Desktop/MUST_VISIT.xlsx" + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } } ], "func": "compare_table", "result": { "type": "vm_file", - "path": [ - "/home/user/Desktop/MUST_VISIT.xlsx", - "/home/user/Desktop/MUST_VISIT-Sheet1.csv" - ], - "dest": [ - "MUST_VISIT.xlsx", - "MUST_VISIT-Sheet1.csv" - ], - "multi": true + "path": "/home/user/Desktop/MUST_VISIT.xlsx", + "dest": "MUST_VISIT.xlsx" }, "expected": { "type": "cloud_file", - "path": [ - "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download", - "https://drive.google.com/uc?id=1CGoRQDLw9-Ai7daq3qCz0o9kYSZB2WNn&export=download" - ], - "dest": [ - "MUST_VISIT-gt.xlsx", - "MUST_VISIT-gt-Sheet1.csv" - ], - "multi": true + "path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download", + "dest": "MUST_VISIT-gt.xlsx" }, "options": { "rules": [ { - "type": "sheet_print", + "type": "sheet_fuzzy", "sheet_idx0": "RNSheet1", - "sheet_idx1": "ENSheet1" + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": ["A1:A6", "D1:D6"], + "type": "exact_match" + }, + { + "range": ["B1:B6"], + "type": "fuzzy_match", + "threshold": 85, + "normalization": [ + ["Rd", "Road"], + ["St", "Street"] + ], + "ignore_case": true + }, + { + "range": ["C1:C6"], + "type": "includes", + "trim_leadings": "+ ", + "ignore_chars": " ()-" + } + ] } ] }