From 1bc08f3d3bde2393c8223405ab6a9d63af85a1d9 Mon Sep 17 00:00:00 2001 From: David Chang Date: Tue, 26 Dec 2023 21:07:42 +0800 Subject: [PATCH 1/4] ver Dec26thv3 forgot to export compare_with_charts in metrics/__init__.py --- desktop_env/evaluators/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 623f948..73090bc 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,2 +1,2 @@ -from .table import compare_table, compare_with_sparklines +from .table import compare_table, compare_with_sparklines, compare_with_charts from .table import check_sheet_list, check_xlsx_freeze From 54bf8ff9d14c1f3e3891b759227f9a53c581954f Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 27 Dec 2023 17:09:49 +0800 Subject: [PATCH 2/4] ver Dec27th improved load_charts --- desktop_env/evaluators/metrics/table.py | 119 +++----------------- desktop_env/evaluators/metrics/utils.py | 142 ++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 106 deletions(-) create mode 100644 desktop_env/evaluators/metrics/utils.py diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index b81575a..bbb99de 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -1,14 +1,10 @@ import pandas as pd -import zipfile -import lxml.etree -import lxml.cssselect -from lxml.etree import _Element -import xmltodict #import pylightxl import openpyxl -from openpyxl import Workbook +#from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet -from openpyxl.chart._chart import ChartBase + +from utils import load_charts, load_sparklines from typing import Dict, List from typing import Any @@ -20,104 +16,35 @@ def compare_table(actual, expected): # Compare the DataFrames return 1 if df1.equals(df2) else 0 -_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") - , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") - ] -_xlsx_ns_mapping = dict(_xlsx_namespaces) -_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) -_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) -#print(_sparklines_selector.css) -def _load_sparklines(xlsx_file: str) -> Dict[str, str]: - """ - This function modifies data_frame in-place - - Args: - xlsx_file (str): path to xlsx - - Returns: - List[Dict[str, str]]: sparkline definitions in form of - { - "F3": "Sheet1!C3:E3" - } - """ - - # read xlsx - with zipfile.ZipFile(xlsx_file, "r") as z_f: - with z_f.open("xl/worksheets/sheet1.xml") as f: - sheet1: _Element = lxml.etree.fromstring(f.read()) - sparklines: List[_Element] = _sparklines_selector(sheet1) - - sparklines_dict: Dict[str, str] = {} - for sp_l in sparklines: - sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") - sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml - , process_namespaces=True - , namespaces=_xlsx_ns_imapping - ) - sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] - return sparklines_dict - def compare_with_sparklines(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) print("Normal Contents Metric: {:}".format(normal_content_metric)) - sp1 = _load_sparklines(actual) - sp2 = _load_sparklines(expected) + sp1 = load_sparklines(actual) + sp2 = load_sparklines(expected) sparkline_metric: bool = sp1 == sp2 print("Sparkline Metric: {:}".format(sparkline_metric)) return float(normal_content_metric and sparkline_metric) -def _load_charts(xlsx_file: str) -> Dict[str, Any]: +def compare_with_charts(actual: str, expected: str, **options) -> float: """ Args: - xlsx_file (str): path to xlsx - - Returns: - Dict[str, Any]: information of charts + actual (str): path to result xlsx + expected (str): path to gold xlsx + options (Dict[str, List[str]]): dict like {"chart_props": list of str} + giving the concerned chart properties """ - workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) - worksheet: Worksheet = workbook.active - charts: List[ChartBase] = worksheet._charts - - chart_set: Dict[str, Any] = {} - for ch in charts: - series: List[str] = [] - for ser in ch.series: - value_num = ser.val.numRef.f\ - if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\ - else "" - value_str = ser.val.strRef.f\ - if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\ - else "" - categ_num = ser.cat.numRef.f\ - if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\ - else "" - categ_str = ser.cat.strRef.f\ - if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\ - else "" - series.append( "{:},{:},{:},{:}".format( value_num, value_str - , categ_num, categ_str - ) - ) - series: str = ";".join(series) - - # TODO: maybe more aspects, like chart type - info: Dict[str, Any] = {} - chart_set[series] = info - return chart_set - -def compare_with_charts(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) print("Normal Contents Metric: {:}".format(normal_content_metric)) - charts1 = _load_charts(actual) - charts2 = _load_charts(expected) + charts1 = load_charts(actual, **options) + charts2 = load_charts(expected, **options) chart_metric: bool = charts1==charts2 print("Chart Metric: {:}".format(chart_metric)) @@ -202,25 +129,5 @@ if __name__ == '__main__': #print(check_sheet_list(path1, rule)) path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" - #workbook1: Workbook = openpyxl.load_workbook(filename=path1) - #worksheet1: Worksheet = workbook1.active - #charts: List[ChartBase] = worksheet1._charts - #print(len(charts)) - #print(type(charts[0])) -# - #print(len(charts[0].series)) - #print(type(charts[0].series[0])) - #print(type(charts[0].series[0].val)) - ##print(charts[0].series[0].val) - #print(charts[0].series[0].val.numRef.f) -# - #print(type(charts[0].series[0].cat)) - ##print(charts[0].series[0].cat) - #print(charts[0].series[0].cat.numRef) - #print(charts[0].series[0].cat.strRef) - #print(charts[0].series[0].cat.strRef.f) -# - #df1 = pd.read_excel(path1) - #print(df1) path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" - print(compare_with_charts(path1, path2)) + print(compare_with_charts(path1, path2, chart_props=["title"])) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py new file mode 100644 index 0000000..ff4317e --- /dev/null +++ b/desktop_env/evaluators/metrics/utils.py @@ -0,0 +1,142 @@ +import zipfile +import lxml.etree +import lxml.cssselect +from lxml.etree import _Element +import xmltodict +import openpyxl +from openpyxl import Workbook +from openpyxl.worksheet.worksheet import Worksheet +from openpyxl.chart._chart import ChartBase + +from typing import Dict, List, Set +from typing import Any + +_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") + , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") + ] +_xlsx_ns_mapping = dict(_xlsx_namespaces) +_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) +_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) +#print(_sparklines_selector.css) +def load_sparklines(xlsx_file: str) -> Dict[str, str]: + """ + This function modifies data_frame in-place + + Args: + xlsx_file (str): path to xlsx + + Returns: + List[Dict[str, str]]: sparkline definitions in form of + { + "F3": "Sheet1!C3:E3" + } + """ + + # read xlsx + with zipfile.ZipFile(xlsx_file, "r") as z_f: + with z_f.open("xl/worksheets/sheet1.xml") as f: + sheet1: _Element = lxml.etree.fromstring(f.read()) + sparklines: List[_Element] = _sparklines_selector(sheet1) + + sparklines_dict: Dict[str, str] = {} + for sp_l in sparklines: + sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") + sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml + , process_namespaces=True + , namespaces=_xlsx_ns_imapping + ) + sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] + return sparklines_dict + +def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: + """ + Args: + xlsx_file (str): path to xlsx + options (Dict[str, List[str]]): dict like {"chart_props": list of str} + giving the concerned chart properties + + Returns: + Dict[str, Any]: information of charts + """ + + workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) + worksheet: Worksheet = workbook.active + charts: List[ChartBase] = worksheet._charts + + chart_set: Dict[str, Any] = {} + chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set() + for ch in charts: + series: List[str] = [] + for ser in ch.series: + value_num = ser.val.numRef.f\ + if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\ + else "" + value_str = ser.val.strRef.f\ + if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\ + else "" + categ_num = ser.cat.numRef.f\ + if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\ + else "" + categ_str = ser.cat.strRef.f\ + if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\ + else "" + series.append( "{:},{:},{:},{:}".format( value_num, value_str + , categ_num, categ_str + ) + ) + series: str = ";".join(series) + + # TODO: maybe more aspects, like chart type + info: Dict[str, Any] = {} + + if "title" in chart_props: + info["title"] = ch.title.tx.rich.p[0].r[0].t + if "anchor" in chart_props: + info["anchor"] = [ ch.anchor.editAs + , ch.anchor._from.col, ch.anchor.to.row + , ch.anchor.to.col, ch.anchor.to.row + ] + if "width" in chart_props: + info["width"] = ch.width + if "height" in chart_props: + info["height"] = ch.height + chart_set[series] = info + return chart_set + +if __name__ == "__main__": + path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" + workbook1: Workbook = openpyxl.load_workbook(filename=path1) + worksheet1: Worksheet = workbook1.active + charts: List[ChartBase] = worksheet1._charts + #print(len(charts)) + #print(type(charts[0])) +# + #print(len(charts[0].series)) + #print(type(charts[0].series[0])) + #print(type(charts[0].series[0].val)) + ##print(charts[0].series[0].val) + #print(charts[0].series[0].val.numRef.f) +# + #print(type(charts[0].series[0].cat)) + ##print(charts[0].series[0].cat) + #print(charts[0].series[0].cat.numRef) + #print(charts[0].series[0].cat.strRef) + #print(charts[0].series[0].cat.strRef.f) + + #print(type(charts[0].title.tx.strRef)) + #print(type(charts[0].title.tx.rich)) + #print(type(charts[0].title.txPr)) + #print(len(charts[0].title.tx.rich.p)) + #print(len(charts[0].title.tx.rich.p[0].r)) + #print(type(charts[0].title.tx.rich.p[0].r[0])) + #print(type(charts[0].title.tx.rich.p[0].r[0].t)) + #print(charts[0].title.tx.rich.p[0].r[0].t) + + #print(type(charts[0].anchor)) + #print(charts[0].anchor.editAs) + #print(charts[0].anchor._from.col, charts[0].anchor.to.row) + #print(charts[0].anchor.to.col, charts[0].anchor.to.row) + + #df1 = pd.read_excel(path1) + #print(df1) + print(load_charts(path1, chart_props=["title", "width", "height"])) From 4e5920264a61f743ee3cfb6a6eb9b7097808a0f7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 27 Dec 2023 17:51:41 +0800 Subject: [PATCH 3/4] ver Dec27thv2 updated a task config updated documents fixed the options feature of evaluator updated with new properties of charts current load_charts should be ok, I think --- desktop_env/envs/desktop_env.py | 4 +++- desktop_env/evaluators/metrics/table.py | 2 +- desktop_env/evaluators/metrics/utils.py | 18 +++++++++++++++++- .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 5 +++++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index ec00181..7aaf84e 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -79,6 +79,7 @@ class DesktopEnv(gym.Env): self.metric: Metric = getattr(metrics, self.evaluator["func"]) self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"])) + self.metric_options: Dict[str, Any] = self.evaluator.get("options", {}) # Initialize emulator and controller print("Initializing...") @@ -165,6 +166,7 @@ class DesktopEnv(gym.Env): self.metric: Metric = getattr(metrics, self.evaluator["func"]) self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"])) + self.metric_options = self.evaluator.get("options", {}) self.setup_controller.reset_cache_dir(self.cache_dir) @@ -237,7 +239,7 @@ class DesktopEnv(gym.Env): result = self.result_getter(self, self.evaluator["result"]) expected = self.expected_getter(self, self.evaluator["expected"]) - metric: float = self.metric(result, expected) + metric: float = self.metric(result, expected, **self.metric_options) return metric diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index bbb99de..b07e6e6 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -130,4 +130,4 @@ if __name__ == '__main__': path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" - print(compare_with_charts(path1, path2, chart_props=["title"])) + print(compare_with_charts(path1, path2, chart_props=["type"])) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index ff4317e..a466834 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -48,6 +48,13 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]: sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] return sparklines_dict +# Available Chart Properties: +# title: str +# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1] +# width: number +# height: number +# type: "scatterChart" | "lineChart" | "barChart" +# xtitle, ytitle, ztitle: str def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: """ Args: @@ -100,6 +107,15 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: info["width"] = ch.width if "height" in chart_props: info["height"] = ch.height + if "type" in chart_props: + info["type"] = ch.tagname + + if "xtitle" in chart_props: + info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t + if "ytitle" in chart_props: + info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t + if "ztitle" in chart_props: + info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t chart_set[series] = info return chart_set @@ -139,4 +155,4 @@ if __name__ == "__main__": #df1 = pd.read_excel(path1) #print(df1) - print(load_charts(path1, chart_props=["title", "width", "height"])) + print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"])) diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json index 20258e0..548cd29 100644 --- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -37,6 +37,11 @@ "type": "cloud_file", "path": "https://101.43.24.67/s/SLL4CgyMiyre3Ss/download/Create_column_charts_using_statistics_gold.xlsx", "dest": "Create_column_charts_using_statistics_gold.xlsx" + }, + "options": { + "chart_props": [ + "type" + ] } } } From 7320f0aec47081bc15a57cc3bf1d0906bd9136e0 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 27 Dec 2023 18:00:16 +0800 Subject: [PATCH 4/4] ver Dec27thv3 added chart property of bar direction --- desktop_env/evaluators/metrics/table.py | 2 +- desktop_env/evaluators/metrics/utils.py | 5 ++++- .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index b07e6e6..2366eed 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -130,4 +130,4 @@ if __name__ == '__main__': path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" - print(compare_with_charts(path1, path2, chart_props=["type"])) + print(compare_with_charts(path1, path2, chart_props=["type", "direction"])) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a466834..65db158 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -54,6 +54,7 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]: # width: number # height: number # type: "scatterChart" | "lineChart" | "barChart" +# direction: "bar" (hori) | "col" (vert) # xtitle, ytitle, ztitle: str def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: """ @@ -109,6 +110,8 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: info["height"] = ch.height if "type" in chart_props: info["type"] = ch.tagname + if "direction" in chart_props: + info["direction"] = ch.barDir if "xtitle" in chart_props: info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t @@ -120,7 +123,7 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]: return chart_set if __name__ == "__main__": - path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" + path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx" workbook1: Workbook = openpyxl.load_workbook(filename=path1) worksheet1: Worksheet = workbook1.active charts: List[ChartBase] = worksheet1._charts diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json index 548cd29..f5e9552 100644 --- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -40,7 +40,8 @@ }, "options": { "chart_props": [ - "type" + "type", + "direction" ] } }