ver Dec27th

merged zdy into main
2023-12-27 20:40:23 +08:00
parent 50b82167d0 7320f0aec4
commit 2a9e5cc373
4 changed files with 183 additions and 107 deletions
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -79,6 +79,7 @@ class DesktopEnv(gym.Env):
        self.metric: Metric = getattr(metrics, self.evaluator["func"])
        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
        self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
        # Initialize emulator and controller
        print("Initializing...")
@@ -165,6 +166,7 @@ class DesktopEnv(gym.Env):
            self.metric: Metric = getattr(metrics, self.evaluator["func"])
            self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
            self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
            self.metric_options = self.evaluator.get("options", {})
            self.setup_controller.reset_cache_dir(self.cache_dir)
@@ -237,7 +239,7 @@ class DesktopEnv(gym.Env):
        result = self.result_getter(self, self.evaluator["result"])
        expected = self.expected_getter(self, self.evaluator["expected"])
-        metric: float = self.metric(result, expected)
+        metric: float = self.metric(result, expected, **self.metric_options)
        return metric
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -1,14 +1,10 @@
 import pandas as pd
 import zipfile
 import lxml.etree
 import lxml.cssselect
 from lxml.etree import _Element
 import xmltodict
 #import pylightxl
 import openpyxl
-from openpyxl import Workbook
+#from openpyxl import Workbook
 from openpyxl.worksheet.worksheet import Worksheet
-from openpyxl.chart._chart import ChartBase
+
 from utils import load_charts, load_sparklines
 from typing import Dict, List
 from typing import Any
@@ -20,104 +16,35 @@ def compare_table(actual, expected):
    # Compare the DataFrames
    return 1 if df1.equals(df2) else 0
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
 _xlsx_ns_mapping = dict(_xlsx_namespaces)
 _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
 _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
 #print(_sparklines_selector.css)
 def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place
    Args:
        xlsx_file (str): path to xlsx
    Returns:
        List[Dict[str, str]]: sparkline definitions in form of
          {
            "F3": "Sheet1!C3:E3"
          }
    """
    # read xlsx
    with zipfile.ZipFile(xlsx_file, "r") as z_f:
        with z_f.open("xl/worksheets/sheet1.xml") as f:
            sheet1: _Element = lxml.etree.fromstring(f.read())
            sparklines: List[_Element] = _sparklines_selector(sheet1)
    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
                                                              , process_namespaces=True
                                                              , namespaces=_xlsx_ns_imapping
                                                              )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict
 def compare_with_sparklines(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    print("Normal Contents Metric: {:}".format(normal_content_metric))
-    sp1 = _load_sparklines(actual)
+    sp1 = load_sparklines(actual)
-    sp2 = _load_sparklines(expected)
+    sp2 = load_sparklines(expected)
    sparkline_metric: bool = sp1 == sp2
    print("Sparkline Metric: {:}".format(sparkline_metric))
    return float(normal_content_metric and sparkline_metric)
-def _load_charts(xlsx_file: str) -> Dict[str, Any]:
+def compare_with_charts(actual: str, expected: str, **options) -> float:
    """
    Args:
-        xlsx_file (str): path to xlsx
+        actual (str): path to result xlsx
-
+        expected (str): path to gold xlsx
-    Returns:
+        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
-        Dict[str, Any]: information of charts
+          giving the concerned chart properties
    """
    workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
    worksheet: Worksheet = workbook.active
    charts: List[ChartBase] = worksheet._charts
    chart_set: Dict[str, Any] = {}
    for ch in charts:
        series: List[str] = []
        for ser in ch.series:
            value_num = ser.val.numRef.f\
                     if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
                   else ""
            value_str = ser.val.strRef.f\
                     if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
                   else ""
            categ_num = ser.cat.numRef.f\
                     if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
                   else ""
            categ_str = ser.cat.strRef.f\
                     if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
                   else ""
            series.append( "{:},{:},{:},{:}".format( value_num, value_str
                                                   , categ_num, categ_str
                                                   )
                         )
        series: str = ";".join(series)
        # TODO: maybe more aspects, like chart type
        info: Dict[str, Any] = {}
        chart_set[series] = info
    return chart_set
 def compare_with_charts(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    print("Normal Contents Metric: {:}".format(normal_content_metric))
-    charts1 = _load_charts(actual)
+    charts1 = load_charts(actual, **options)
-    charts2 = _load_charts(expected)
+    charts2 = load_charts(expected, **options)
    chart_metric: bool = charts1==charts2
    print("Chart Metric: {:}".format(chart_metric))
@@ -202,25 +129,5 @@ if __name__ == '__main__':
    #print(check_sheet_list(path1, rule))
    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
    #workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    #worksheet1: Worksheet = workbook1.active
    #charts: List[ChartBase] = worksheet1._charts
    #print(len(charts))
    #print(type(charts[0]))
 #
    #print(len(charts[0].series))
    #print(type(charts[0].series[0]))
    #print(type(charts[0].series[0].val))
    ##print(charts[0].series[0].val)
    #print(charts[0].series[0].val.numRef.f)
 #
    #print(type(charts[0].series[0].cat))
    ##print(charts[0].series[0].cat)
    #print(charts[0].series[0].cat.numRef)
    #print(charts[0].series[0].cat.strRef)
    #print(charts[0].series[0].cat.strRef.f)
 #
    #df1 = pd.read_excel(path1)
    #print(df1)
    path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
-    print(compare_with_charts(path1, path2))
+    print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -0,0 +1,161 @@
 import zipfile
 import lxml.etree
 import lxml.cssselect
 from lxml.etree import _Element
 import xmltodict
 import openpyxl
 from openpyxl import Workbook
 from openpyxl.worksheet.worksheet import Worksheet
 from openpyxl.chart._chart import ChartBase
 from typing import Dict, List, Set
 from typing import Any
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
 _xlsx_ns_mapping = dict(_xlsx_namespaces)
 _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
 _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
 #print(_sparklines_selector.css)
 def load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place
    Args:
        xlsx_file (str): path to xlsx
    Returns:
        List[Dict[str, str]]: sparkline definitions in form of
          {
            "F3": "Sheet1!C3:E3"
          }
    """
    # read xlsx
    with zipfile.ZipFile(xlsx_file, "r") as z_f:
        with z_f.open("xl/worksheets/sheet1.xml") as f:
            sheet1: _Element = lxml.etree.fromstring(f.read())
            sparklines: List[_Element] = _sparklines_selector(sheet1)
    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
                                                              , process_namespaces=True
                                                              , namespaces=_xlsx_ns_imapping
                                                              )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict
 # Available Chart Properties:
 # title: str
 # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
 # width: number
 # height: number
 # type: "scatterChart" | "lineChart" | "barChart"
 # direction: "bar" (hori) | "col" (vert)
 # xtitle, ytitle, ztitle: str
 def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
    """
    Args:
        xlsx_file (str): path to xlsx
        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
          giving the concerned chart properties
    Returns:
        Dict[str, Any]: information of charts
    """
    workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
    worksheet: Worksheet = workbook.active
    charts: List[ChartBase] = worksheet._charts
    chart_set: Dict[str, Any] = {}
    chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
    for ch in charts:
        series: List[str] = []
        for ser in ch.series:
            value_num = ser.val.numRef.f\
                     if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
                   else ""
            value_str = ser.val.strRef.f\
                     if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
                   else ""
            categ_num = ser.cat.numRef.f\
                     if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
                   else ""
            categ_str = ser.cat.strRef.f\
                     if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
                   else ""
            series.append( "{:},{:},{:},{:}".format( value_num, value_str
                                                   , categ_num, categ_str
                                                   )
                         )
        series: str = ";".join(series)
        # TODO: maybe more aspects, like chart type
        info: Dict[str, Any] = {}
        if "title" in chart_props:
            info["title"] = ch.title.tx.rich.p[0].r[0].t
        if "anchor" in chart_props:
            info["anchor"] = [ ch.anchor.editAs
                             , ch.anchor._from.col, ch.anchor.to.row
                             , ch.anchor.to.col, ch.anchor.to.row
                             ]
        if "width" in chart_props:
            info["width"] = ch.width
        if "height" in chart_props:
            info["height"] = ch.height
        if "type" in chart_props:
            info["type"] = ch.tagname
        if "direction" in chart_props:
            info["direction"] = ch.barDir
        if "xtitle" in chart_props:
            info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
        if "ytitle" in chart_props:
            info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
        if "ztitle" in chart_props:
            info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
        chart_set[series] = info
    return chart_set
 if __name__ == "__main__":
    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
    workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    worksheet1: Worksheet = workbook1.active
    charts: List[ChartBase] = worksheet1._charts
    #print(len(charts))
    #print(type(charts[0]))
 #
    #print(len(charts[0].series))
    #print(type(charts[0].series[0]))
    #print(type(charts[0].series[0].val))
    ##print(charts[0].series[0].val)
    #print(charts[0].series[0].val.numRef.f)
 #
    #print(type(charts[0].series[0].cat))
    ##print(charts[0].series[0].cat)
    #print(charts[0].series[0].cat.numRef)
    #print(charts[0].series[0].cat.strRef)
    #print(charts[0].series[0].cat.strRef.f)
    #print(type(charts[0].title.tx.strRef))
    #print(type(charts[0].title.tx.rich))
    #print(type(charts[0].title.txPr))
    #print(len(charts[0].title.tx.rich.p))
    #print(len(charts[0].title.tx.rich.p[0].r))
    #print(type(charts[0].title.tx.rich.p[0].r[0]))
    #print(type(charts[0].title.tx.rich.p[0].r[0].t))
    #print(charts[0].title.tx.rich.p[0].r[0].t)
    #print(type(charts[0].anchor))
    #print(charts[0].anchor.editAs)
    #print(charts[0].anchor._from.col, charts[0].anchor.to.row)
    #print(charts[0].anchor.to.col, charts[0].anchor.to.row)
    #df1 = pd.read_excel(path1)
    #print(df1)
    print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
--- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json
+++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json
@@ -37,6 +37,12 @@
 			"type": "cloud_file",
 			"path": "https://drive.usercontent.google.com/download?id=1yiTCGZvGccWET9u8K7looD3ybH7PO9gb&export=download&authuser=0&confirm=t&uuid=65f54a6f-bb2e-40c3-8a76-091d785a5aca&at=APZUnTVbeO6maMvzItLvSwdBEZoM:1703595892144",
 			"dest": "Create_column_charts_using_statistics_gold.xlsx"
 		},
 		"options": {
 			"chart_props": [
 				"type",
 				"direction"
 			]
 		}
 	}
 }