From 6f225b2a020b1e35fd4b1772df2d729d24b2c6c1 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 29 Dec 2023 21:43:33 +0800 Subject: [PATCH] ver Dec29thv2 re-organized functions w.r.t. comparing xlsx with a golden one --- desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/table.py | 111 ++++++++---------- desktop_env/evaluators/metrics/utils.py | 8 +- .../21df9241-f8d7-4509-b7f1-37e501a823f7.json | 7 +- .../2bd59342-0664-4ccb-ba87-79379096cc08.json | 8 +- .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 5 +- 6 files changed, 72 insertions(+), 69 deletions(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 73090bc..d4aa8df 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,2 +1,2 @@ -from .table import compare_table, compare_with_sparklines, compare_with_charts +from .table import compare_table from .table import check_sheet_list, check_xlsx_freeze diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 878db8a..7daa2fb 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -9,71 +9,62 @@ from typing import Dict, List from typing import Any -def compare_table(actual, expected): - df1 = pd.read_excel(expected) - df2 = pd.read_excel(actual) - - # Compare the DataFrames - return 1 if df1.equals(df2) else 0 - - -def compare_with_sparklines(actual: str, expected: str) -> float: - df1 = pd.read_excel(actual) - df2 = pd.read_excel(expected) - normal_content_metric: bool = df1.equals(df2) - print("Normal Contents Metric: {:}".format(normal_content_metric)) - - sp1 = load_sparklines(actual) - sp2 = load_sparklines(expected) - sparkline_metric: bool = sp1 == sp2 - print("Sparkline Metric: {:}".format(sparkline_metric)) - - return float(normal_content_metric and sparkline_metric) - - -def compare_with_charts(actual: str, expected: str, **options) -> float: +def compare_table(actual: str, expected: str, **options) -> float: """ Args: actual (str): path to result xlsx expected (str): path to gold xlsx - options (Dict[str, List[str]]): dict like {"chart_props": list of str} - giving the concerned chart properties + options (Dict[str, List[str]]): dict like + { + "features": list of str for other features, supports: + * sparkline + * chart + * number_format + "chart_props": list of str, giving the converned chart properties + } + + Return: + float: the score """ - df1 = pd.read_excel(actual) - df2 = pd.read_excel(expected) - normal_content_metric: bool = df1.equals(df2) - print("Normal Contents Metric: {:}".format(normal_content_metric)) + df1 = pd.read_excel(expected) + df2 = pd.read_excel(actual) + metric: bool = df1.equals(df2) + print("Normal Contents Metric: {:}".format(metric)) - charts1 = load_charts(actual, **options) - charts2 = load_charts(expected, **options) - chart_metric: bool = charts1 == charts2 - print("Chart Metric: {:}".format(chart_metric)) + features: List[str] = options.get("features", []) + for ftr in features: + workbook1: Workbook = openpyxl.load_workbook(actual) + workbook2: Workbook = openpyxl.load_workbook(expected) - return float(normal_content_metric and chart_metric) + if ftr=="sparkline": + sp1 = load_sparklines(actual) + sp2 = load_sparklines(expected) + new_metric: bool = sp1 == sp2 + print("Sparkline Metric: {:}".format(new_metric)) + elif ftr=="chart": + charts1 = load_charts(workbook1, **options) + charts2 = load_charts(workbook2, **options) + new_metric: bool = charts1 == charts2 + print("Chart Metric: {:}".format(new_metric)) + elif ftr=="number_format": + number_formats1: List[str] = [ c.number_format.lower()\ + for col in workbook1.active.iter_cols()\ + for c in col\ + if c.data_type=="n" + ] + number_formats2: List[str] = [ c.number_format.lower()\ + for col in workbook2.active.iter_cols()\ + for c in col\ + if c.data_type=="n" + ] + new_metric: bool = number_formats1==number_formats2 + print("Number Format Metric: {:}".format(new_metric)) + else: + raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr)) + metric = metric and new_metric -def compare_with_formats(actual: str, expected: str) -> float: - df1 = pd.read_excel(actual) - df2 = pd.read_excel(expected) - normal_content_metric: bool = df1.equals(df2) - print("Normal Contents Metric: {:}".format(normal_content_metric)) - - workbook1: Workbook = openpyxl.load_workbook(actual) - number_formats1: List[str] = [ c.number_format.lower()\ - for col in workbook1.active.iter_cols()\ - for c in col\ - if c.data_type=="n" - ] - workbook2: Workbook = openpyxl.load_workbook(expected) - number_formats2: List[str] = [ c.number_format.lower()\ - for col in workbook2.active.iter_cols()\ - for c in col\ - if c.data_type=="n" - ] - number_format_metric: bool = number_formats1==number_formats2 - print("Number Format Metric: {:}".format(number_format_metric)) - - return float(normal_content_metric & number_format_metric) + return float(metric) def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: # workbook: Workbook = openpyxl.load_workbook(filename=result) @@ -155,9 +146,9 @@ if __name__ == '__main__': # ] # print(check_sheet_list(path1, rule)) - #path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" - #path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" - #print(compare_with_charts(path1, path2, chart_props=["type", "direction"])) + path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" + path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" + print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"])) path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx" path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx" @@ -172,4 +163,4 @@ if __name__ == '__main__': #print(worksheet1[position]) #print(worksheet1[position].value) #print(worksheet1[position].number_format) - print(compare_with_formats(path1, path2)) + print(compare_table(path1, path2, features=["number_format"])) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 65db158..b826d87 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -56,10 +56,10 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]: # type: "scatterChart" | "lineChart" | "barChart" # direction: "bar" (hori) | "col" (vert) # xtitle, ytitle, ztitle: str -def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: +def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]: """ Args: - xlsx_file (str): path to xlsx + xlsx_file (Workbook): concerned excel book options (Dict[str, List[str]]): dict like {"chart_props": list of str} giving the concerned chart properties @@ -67,8 +67,8 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: Dict[str, Any]: information of charts """ - workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) - worksheet: Worksheet = workbook.active + #workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) + worksheet: Worksheet = xlsx_file.active charts: List[ChartBase] = worksheet._charts chart_set: Dict[str, Any] = {} diff --git a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json index b0f9577..4347fd6 100644 --- a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json +++ b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json @@ -27,7 +27,7 @@ "libreoffice_calc" ], "evaluator": { - "func": "compare_with_formats", + "func": "compare_table", "result": { "type": "vm_file", "path": "/home/david/Represent_in_millions_billions.xlsx", @@ -37,6 +37,11 @@ "type": "cloud_file", "path": "", "dest": "Represent_in_millions_billions_gold.xlsx" + }, + "options": { + "features": [ + "number_format" + ] } } } diff --git a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json index aa483ba..4974fcf 100644 --- a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json +++ b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json @@ -26,7 +26,7 @@ "libreoffice calc" ], "evaluator": { - "func": "compare_with_sparklines", + "func": "compare_table", "expected": { "type": "cloud_file", "path": "https://drive.usercontent.google.com/download?id=1KQJJLVPGtTL_7ArEWvwwbFbJSiA3cgSE&export=download&authuser=0&confirm=t&uuid=6b11c721-caad-439a-b369-4c13c7a485df&at=APZUnTV5-1isKrDKSHV9NeJ6TDeS:1703509054094", @@ -36,6 +36,10 @@ "type": "vm_file", "path": "/home/david/OrderId_Month_Chart.xlsx", "dest": "OrderId_Month_Chart.xlsx" - } + }, + "options": { + "features": [ + "sparkline" + ] } } diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json index fd7a7e7..329c4f0 100644 --- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -27,7 +27,7 @@ "libreoffice_calc" ], "evaluator": { - "func": "compare_with_charts", + "func": "compare_table", "result": { "type": "vm_file", "path": "/home/david/Create_column_charts_using_statistics.xlsx", @@ -39,6 +39,9 @@ "dest": "Create_column_charts_using_statistics_gold.xlsx" }, "options": { + "features": [ + "chart" + ], "chart_props": [ "type", "direction"