diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 740c9e2..f8dfefd 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1 +1 @@ -from .table import compare_table +from .table import compare_table, compare_with_sparklines diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index bd49297..d984c0f 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -1,14 +1,74 @@ -def compare_table(expected, actual): - import pandas as pd +import pandas as pd +import zipfile +import lxml.etree +import lxml.cssselect +from lxml.etree import _Element +import xmltodict +#import pylightxl + +from typing import Dict, List +#from typing import Any + +def compare_table(actual, expected): df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) # Compare the DataFrames return 1 if df1.equals(df2) else 0 +_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") + , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") + ] +_xlsx_ns_mapping = dict(_xlsx_namespaces) +_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) +_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) +#print(_sparklines_selector.css) +def _load_sparklines(xlsx_file: str) -> Dict[str, str]: + """ + This function modifies data_frame in-place + + Args: + xlsx_file (str): path to xlsx + + Returns: + List[Dict[str, str]]: sparkline definitions in form of + { + "F3": "Sheet1!C3:E3" + } + """ + + # read xlsx + with zipfile.ZipFile(xlsx_file, "r") as z_f: + with z_f.open("xl/worksheets/sheet1.xml") as f: + sheet1: _Element = lxml.etree.fromstring(f.read()) + sparklines: List[_Element] = _sparklines_selector(sheet1) + + sparklines_dict: Dict[str, str] = {} + for sp_l in sparklines: + sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") + sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml + , process_namespaces=True + , namespaces=_xlsx_ns_imapping + ) + sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] + return sparklines_dict + +def compare_with_sparklines(actual: str, expected: str) -> float: + df1 = pd.read_excel(actual) + df2 = pd.read_excel(expected) + normal_content_metric: bool = df1.equals(df2) + + sp1 = _load_sparklines(actual) + sp2 = _load_sparklines(expected) + sparkline_metric: bool = sp1 == sp2 + + return float(normal_content_metric and sparkline_metric) if __name__ == '__main__': - path1 = "" - path2 = "" - - print(compare_table(path1, path2)) + #path1 = "" + #path2 = "" + #print(compare_table(path1, path2)) + + path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx" + path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx" + print(compare_with_sparklines(path1, path2)) diff --git a/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json b/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json index 98d773a..284f162 100644 --- a/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json +++ b/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json @@ -3,20 +3,38 @@ "snapshot": "libreoffice_calc", "instruction": "Make sparkline chart line by line", "source": "https://www.youtube.com/shorts/L3Z-F1QTQFY", - "config": { - "download": [ - [ - "", - "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx" - ] - ], - "open": [ - "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx" + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://101.43.24.67/s/wrEyMi8HsmFjQrZ/download/OrderId_Month_Chart.xlsx", + "path": "/home/david/OrderId_Month_Chart.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/david/OrderId_Month_Chart.xlsx" ] }, "trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08", "related_apps": [ "libreoffice calc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "expected": { + "type": "cloud_file", + "path": "https://101.43.24.67/s/t7pgJxNoAGFQWEM/download/OrderId_Month_Chart_gold.xlsx", + "dest": "OrderId_Month_Chart_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "/home/david/OrderId_Month_Chart.xlsx", + "dest": "OrderId_Month_Chart.xlsx" + } + } } diff --git a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json index e25e698..b34683a 100644 --- a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json +++ b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json @@ -3,32 +3,40 @@ "snapshot": "libreoffice_calc", "instruction": "Help me fill the columns of First Name, Last Name and Rank", "source": "https://www.youtube.com/shorts/uzPo_CPCHH8", - "config": { - "download": [ - [ - "https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674", - "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx" - ] - ], - "open": [ - "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx" - ] - }, + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://101.43.24.67/s/FBip5fXoR4KEJaa", + "path": "/home/david/Employee_Roles_and_Ranks.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/david/Employee_Roles_and_Ranks.xlsx" + } + } + ], "trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d", "related_apps": [ "libreoffice calc" ], "evaluator": { - "func": "compare_table(expected, actual)", - "paths": { - "expected": { - "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747" - }, - "actual": { - "type": "vm_file", - "path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx" - } + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://101.43.24.67/s/wr7B4GeotNNoeHD", + "dest": "Employee_Roles_and_Ranks_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "/home/david/Employee_Roles_and_Ranks.xlsx", + "dest": "Employee_Roles_and_Ranks.xlsx" } } } diff --git a/requirements.txt b/requirements.txt index f3907ea..28d6d88 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,6 @@ tqdm~=4.65.0 pandas~=2.0.3 flask~=3.0.0 requests-toolbelt~=1.0.0 +lxml +cssselect +xmltodict