This commit is contained in:
tsuky_chen
2024-03-04 21:44:17 +08:00
6 changed files with 254 additions and 31 deletions

View File

@@ -13,7 +13,7 @@ from odf.text import P
from odf.text import Span
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
from fuzzywuzzy import fuzz
from rapidfuzz import fuzz
logger = logging.getLogger("desktopenv.metric.docs")

View File

@@ -5,19 +5,20 @@ import os.path
# import operator
from numbers import Number
from typing import Any, Union, cast, Callable, Iterable
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Set
import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
# from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
, load_filters, load_pivot_tables
from rapidfuzz import fuzz
# from openpyxl.utils import coordinate_to_tuple
@@ -157,8 +158,8 @@ def compare_table(result: str, expected: str = None, **options) -> float:
return 0.
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
sheet1 = sheet1.round()
sheet2 = sheet2.round()
sheet1 = sheet1.round(error_limit)
sheet2 = sheet2.round(error_limit)
metric: bool = sheet1.equals(sheet2)
logger.debug("Sheet1: \n%s", str(sheet1))
logger.debug("Sheet2: \n%s", str(sheet2))
@@ -186,6 +187,61 @@ def compare_table(result: str, expected: str = None, **options) -> float:
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Printed Value #
elif r["type"] == "sheet_fuzzy":
# Fuzzy Match for Ranges {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# rules: list of dict, each dict is like
# { "range": ["A1:B6", "C2:E5"],
# "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
# "threshold": 85, // for fuzzy match
# "ignore_case": true | false,
# "ignore_chars": " ()", # filtered out
# "trim_leadings": "+ ", # filtered by lstrip
# "trim_trailings": "", # filtered by rstrip
# "normalization": [["Rd", "Road"]], # filtered by replace
# }
sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected)
sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected)
total_metric = True
for rl in r["rules"]:
for rng in MultiCellRange(rl["range"]):
for cdn in rng.cells:
value1: str = str(read_cell_value(*sheet1, cdn))
value2: str = str(read_cell_value(*sheet2, cdn))
for rplc in rl.get("normalization", []):
value1 = value1.replace(rplc[0], rplc[1])
value2 = value2.replace(rplc[0], rplc[1])
if "trim_leadings" in rl:
value1 = value1.lstrip(rl["trim_leadings"])
value2 = value2.lstrip(rl["trim_leadings"])
if "trim_trailings" in rl:
value1 = value1.rstrip(rl["trim_trailings"])
value2 = value2.rstrip(rl["trim_trailings"])
if "ignore_chars" in rl:
ignore_chars: Set[str] = set(rl["ignore_chars"])
value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1))
value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2))
if rl.get("ignore_case", False):
value1 = value1.lower()
value2 = value2.lower()
if rl["type"]=="includes":
metric: bool = value1 in value2
if rl["type"]=="includes_by":
metric: bool = value2 in value1
if rl["type"]=="fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
if rl["type"]=="exact_match":
metric: bool = value1==value2
total_metric = total_metric and metric
metric: bool = total_metric
logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Fuzzy Match for Ranges #
elif r["type"] == "sparkline":
# Compare Sparklines {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"

View File

@@ -0,0 +1,25 @@
{
"id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"snapshot": "libreoffice_calc",
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed.",
"source": "authors",
"config": [
],
"trajectory": "trajectories/26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"related_apps": [
],
"evaluator": {
"postconfig": [],
"func": "",
"result": {
},
"expected": {
},
"options": {
}
}
}

View File

@@ -0,0 +1,64 @@
{
"id": "36037439-2044-4b50-b9d1-875b5a332143",
"snapshot": "libreoffice_calc",
"instruction": "Could you please pull up the Google Scholar page of the corresponding author for me in Chrome?",
"source": "authors",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1iTjv59rn8vcvUsh2-M7T5BLyNoutCwYo&export=download&authuser=0&confirm=t&uuid=cc13ea06-8d21-4d15-adb4-8fec94304bde&at=APZUnTX9ydwrAV0UPVKzYS9-LBlH:1709520068240",
"path": "/home/user/Desktop/shi17a.pdf"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Desktop/shi17a.pdf"
}
},
{
"type": "sleep",
"parameters": {
"seconds": 2
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-40)"
]
}
}
],
"trajectory": "trajectories/36037439-2044-4b50-b9d1-875b5a332143",
"related_apps": [
"OS",
"Chrome"
],
"evaluator": {
"func": "is_expected_url_pattern_match",
"result": {
"type": "active_url_from_accessTree",
"goto_prefix": "https://"
},
"expected": {
"type": "rule",
"rules": {
"type": "url",
"url": "?hl=en&",
"expected": [
"https://scholar.google.com/citations",
"user=qRAQ5BsAAAAJ"
]
}
}
}
}

View File

@@ -0,0 +1,54 @@
{
"id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
"snapshot": "libreoffice_calc",
"instruction": "I'm really enjoying this paper. Could you please find the personal webpages of the first author and the last three authors, and add them to a browser bookmark folder named 'Liked Authors'?",
"source": "authors",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1IlJ8kU5MlR6OqJHchsSUJzLCmcrG-8N7&export=download&authuser=0&confirm=t&uuid=d2a1810f-edea-4bfd-9d79-e668b9f11876&at=APZUnTVv_eqtC86YzkEU8_jIhC9W:1709522229162",
"path": "/home/user/Desktop/2206.08853.pdf"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Desktop/2206.08853.pdf"
}
},
{
"type": "sleep",
"parameters": {
"seconds": 2
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)"
]
}
}
],
"trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
"related_apps": [
],
"evaluator": {
"postconfig": [],
"func": "",
"result": {
},
"expected": {
},
"options": {
}
}
}

View File

@@ -53,51 +53,75 @@
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "MUST_VISIT.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user/Desktop",
"/home/user/Desktop/MUST_VISIT.xlsx"
"python",
"-c",
"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 1.0
}
}
],
"func": "compare_table",
"result": {
"type": "vm_file",
"path": [
"/home/user/Desktop/MUST_VISIT.xlsx",
"/home/user/Desktop/MUST_VISIT-Sheet1.csv"
],
"dest": [
"MUST_VISIT.xlsx",
"MUST_VISIT-Sheet1.csv"
],
"multi": true
"path": "/home/user/Desktop/MUST_VISIT.xlsx",
"dest": "MUST_VISIT.xlsx"
},
"expected": {
"type": "cloud_file",
"path": [
"https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
"https://drive.google.com/uc?id=1CGoRQDLw9-Ai7daq3qCz0o9kYSZB2WNn&export=download"
],
"dest": [
"MUST_VISIT-gt.xlsx",
"MUST_VISIT-gt-Sheet1.csv"
],
"multi": true
"path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
"dest": "MUST_VISIT-gt.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_print",
"type": "sheet_fuzzy",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
"sheet_idx1": "ENSheet1",
"rules": [
{
"range": ["A1:A6", "D1:D6"],
"type": "exact_match"
},
{
"range": ["B1:B6"],
"type": "fuzzy_match",
"threshold": 85,
"normalization": [
["Rd", "Road"],
["St", "Street"]
],
"ignore_case": true
},
{
"range": ["C1:C6"],
"type": "includes",
"trim_leadings": "+ ",
"ignore_chars": " ()-"
}
]
}
]
}