ver Dec27th
merged zdy into main
This commit is contained in:
@@ -79,6 +79,7 @@ class DesktopEnv(gym.Env):
|
|||||||
self.metric: Metric = getattr(metrics, self.evaluator["func"])
|
self.metric: Metric = getattr(metrics, self.evaluator["func"])
|
||||||
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
|
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
|
||||||
self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
||||||
|
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
|
||||||
|
|
||||||
# Initialize emulator and controller
|
# Initialize emulator and controller
|
||||||
print("Initializing...")
|
print("Initializing...")
|
||||||
@@ -165,6 +166,7 @@ class DesktopEnv(gym.Env):
|
|||||||
self.metric: Metric = getattr(metrics, self.evaluator["func"])
|
self.metric: Metric = getattr(metrics, self.evaluator["func"])
|
||||||
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
|
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
|
||||||
self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
self.expected_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
||||||
|
self.metric_options = self.evaluator.get("options", {})
|
||||||
|
|
||||||
self.setup_controller.reset_cache_dir(self.cache_dir)
|
self.setup_controller.reset_cache_dir(self.cache_dir)
|
||||||
|
|
||||||
@@ -237,7 +239,7 @@ class DesktopEnv(gym.Env):
|
|||||||
|
|
||||||
result = self.result_getter(self, self.evaluator["result"])
|
result = self.result_getter(self, self.evaluator["result"])
|
||||||
expected = self.expected_getter(self, self.evaluator["expected"])
|
expected = self.expected_getter(self, self.evaluator["expected"])
|
||||||
metric: float = self.metric(result, expected)
|
metric: float = self.metric(result, expected, **self.metric_options)
|
||||||
|
|
||||||
return metric
|
return metric
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import zipfile
|
|
||||||
import lxml.etree
|
|
||||||
import lxml.cssselect
|
|
||||||
from lxml.etree import _Element
|
|
||||||
import xmltodict
|
|
||||||
#import pylightxl
|
#import pylightxl
|
||||||
import openpyxl
|
import openpyxl
|
||||||
from openpyxl import Workbook
|
#from openpyxl import Workbook
|
||||||
from openpyxl.worksheet.worksheet import Worksheet
|
from openpyxl.worksheet.worksheet import Worksheet
|
||||||
from openpyxl.chart._chart import ChartBase
|
|
||||||
|
from utils import load_charts, load_sparklines
|
||||||
|
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -20,104 +16,35 @@ def compare_table(actual, expected):
|
|||||||
# Compare the DataFrames
|
# Compare the DataFrames
|
||||||
return 1 if df1.equals(df2) else 0
|
return 1 if df1.equals(df2) else 0
|
||||||
|
|
||||||
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
|
||||||
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
|
||||||
]
|
|
||||||
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
|
||||||
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
|
||||||
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
|
||||||
#print(_sparklines_selector.css)
|
|
||||||
def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
This function modifies data_frame in-place
|
|
||||||
|
|
||||||
Args:
|
|
||||||
xlsx_file (str): path to xlsx
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[Dict[str, str]]: sparkline definitions in form of
|
|
||||||
{
|
|
||||||
"F3": "Sheet1!C3:E3"
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# read xlsx
|
|
||||||
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
|
||||||
with z_f.open("xl/worksheets/sheet1.xml") as f:
|
|
||||||
sheet1: _Element = lxml.etree.fromstring(f.read())
|
|
||||||
sparklines: List[_Element] = _sparklines_selector(sheet1)
|
|
||||||
|
|
||||||
sparklines_dict: Dict[str, str] = {}
|
|
||||||
for sp_l in sparklines:
|
|
||||||
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
|
|
||||||
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
|
|
||||||
, process_namespaces=True
|
|
||||||
, namespaces=_xlsx_ns_imapping
|
|
||||||
)
|
|
||||||
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
|
|
||||||
return sparklines_dict
|
|
||||||
|
|
||||||
def compare_with_sparklines(actual: str, expected: str) -> float:
|
def compare_with_sparklines(actual: str, expected: str) -> float:
|
||||||
df1 = pd.read_excel(actual)
|
df1 = pd.read_excel(actual)
|
||||||
df2 = pd.read_excel(expected)
|
df2 = pd.read_excel(expected)
|
||||||
normal_content_metric: bool = df1.equals(df2)
|
normal_content_metric: bool = df1.equals(df2)
|
||||||
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
||||||
|
|
||||||
sp1 = _load_sparklines(actual)
|
sp1 = load_sparklines(actual)
|
||||||
sp2 = _load_sparklines(expected)
|
sp2 = load_sparklines(expected)
|
||||||
sparkline_metric: bool = sp1 == sp2
|
sparkline_metric: bool = sp1 == sp2
|
||||||
print("Sparkline Metric: {:}".format(sparkline_metric))
|
print("Sparkline Metric: {:}".format(sparkline_metric))
|
||||||
|
|
||||||
return float(normal_content_metric and sparkline_metric)
|
return float(normal_content_metric and sparkline_metric)
|
||||||
|
|
||||||
def _load_charts(xlsx_file: str) -> Dict[str, Any]:
|
def compare_with_charts(actual: str, expected: str, **options) -> float:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
xlsx_file (str): path to xlsx
|
actual (str): path to result xlsx
|
||||||
|
expected (str): path to gold xlsx
|
||||||
Returns:
|
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
|
||||||
Dict[str, Any]: information of charts
|
giving the concerned chart properties
|
||||||
"""
|
"""
|
||||||
|
|
||||||
workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
|
||||||
worksheet: Worksheet = workbook.active
|
|
||||||
charts: List[ChartBase] = worksheet._charts
|
|
||||||
|
|
||||||
chart_set: Dict[str, Any] = {}
|
|
||||||
for ch in charts:
|
|
||||||
series: List[str] = []
|
|
||||||
for ser in ch.series:
|
|
||||||
value_num = ser.val.numRef.f\
|
|
||||||
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
|
|
||||||
else ""
|
|
||||||
value_str = ser.val.strRef.f\
|
|
||||||
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
|
|
||||||
else ""
|
|
||||||
categ_num = ser.cat.numRef.f\
|
|
||||||
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
|
|
||||||
else ""
|
|
||||||
categ_str = ser.cat.strRef.f\
|
|
||||||
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
|
|
||||||
else ""
|
|
||||||
series.append( "{:},{:},{:},{:}".format( value_num, value_str
|
|
||||||
, categ_num, categ_str
|
|
||||||
)
|
|
||||||
)
|
|
||||||
series: str = ";".join(series)
|
|
||||||
|
|
||||||
# TODO: maybe more aspects, like chart type
|
|
||||||
info: Dict[str, Any] = {}
|
|
||||||
chart_set[series] = info
|
|
||||||
return chart_set
|
|
||||||
|
|
||||||
def compare_with_charts(actual: str, expected: str) -> float:
|
|
||||||
df1 = pd.read_excel(actual)
|
df1 = pd.read_excel(actual)
|
||||||
df2 = pd.read_excel(expected)
|
df2 = pd.read_excel(expected)
|
||||||
normal_content_metric: bool = df1.equals(df2)
|
normal_content_metric: bool = df1.equals(df2)
|
||||||
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
||||||
|
|
||||||
charts1 = _load_charts(actual)
|
charts1 = load_charts(actual, **options)
|
||||||
charts2 = _load_charts(expected)
|
charts2 = load_charts(expected, **options)
|
||||||
chart_metric: bool = charts1==charts2
|
chart_metric: bool = charts1==charts2
|
||||||
print("Chart Metric: {:}".format(chart_metric))
|
print("Chart Metric: {:}".format(chart_metric))
|
||||||
|
|
||||||
@@ -202,25 +129,5 @@ if __name__ == '__main__':
|
|||||||
#print(check_sheet_list(path1, rule))
|
#print(check_sheet_list(path1, rule))
|
||||||
|
|
||||||
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
||||||
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
||||||
#worksheet1: Worksheet = workbook1.active
|
|
||||||
#charts: List[ChartBase] = worksheet1._charts
|
|
||||||
#print(len(charts))
|
|
||||||
#print(type(charts[0]))
|
|
||||||
#
|
|
||||||
#print(len(charts[0].series))
|
|
||||||
#print(type(charts[0].series[0]))
|
|
||||||
#print(type(charts[0].series[0].val))
|
|
||||||
##print(charts[0].series[0].val)
|
|
||||||
#print(charts[0].series[0].val.numRef.f)
|
|
||||||
#
|
|
||||||
#print(type(charts[0].series[0].cat))
|
|
||||||
##print(charts[0].series[0].cat)
|
|
||||||
#print(charts[0].series[0].cat.numRef)
|
|
||||||
#print(charts[0].series[0].cat.strRef)
|
|
||||||
#print(charts[0].series[0].cat.strRef.f)
|
|
||||||
#
|
|
||||||
#df1 = pd.read_excel(path1)
|
|
||||||
#print(df1)
|
|
||||||
path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
||||||
print(compare_with_charts(path1, path2))
|
print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))
|
||||||
|
|||||||
161
desktop_env/evaluators/metrics/utils.py
Normal file
161
desktop_env/evaluators/metrics/utils.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
import zipfile
|
||||||
|
import lxml.etree
|
||||||
|
import lxml.cssselect
|
||||||
|
from lxml.etree import _Element
|
||||||
|
import xmltodict
|
||||||
|
import openpyxl
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.worksheet.worksheet import Worksheet
|
||||||
|
from openpyxl.chart._chart import ChartBase
|
||||||
|
|
||||||
|
from typing import Dict, List, Set
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
||||||
|
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
||||||
|
]
|
||||||
|
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
||||||
|
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
||||||
|
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
||||||
|
#print(_sparklines_selector.css)
|
||||||
|
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
This function modifies data_frame in-place
|
||||||
|
|
||||||
|
Args:
|
||||||
|
xlsx_file (str): path to xlsx
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict[str, str]]: sparkline definitions in form of
|
||||||
|
{
|
||||||
|
"F3": "Sheet1!C3:E3"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# read xlsx
|
||||||
|
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
||||||
|
with z_f.open("xl/worksheets/sheet1.xml") as f:
|
||||||
|
sheet1: _Element = lxml.etree.fromstring(f.read())
|
||||||
|
sparklines: List[_Element] = _sparklines_selector(sheet1)
|
||||||
|
|
||||||
|
sparklines_dict: Dict[str, str] = {}
|
||||||
|
for sp_l in sparklines:
|
||||||
|
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
|
||||||
|
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
|
||||||
|
, process_namespaces=True
|
||||||
|
, namespaces=_xlsx_ns_imapping
|
||||||
|
)
|
||||||
|
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
|
||||||
|
return sparklines_dict
|
||||||
|
|
||||||
|
# Available Chart Properties:
|
||||||
|
# title: str
|
||||||
|
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
|
||||||
|
# width: number
|
||||||
|
# height: number
|
||||||
|
# type: "scatterChart" | "lineChart" | "barChart"
|
||||||
|
# direction: "bar" (hori) | "col" (vert)
|
||||||
|
# xtitle, ytitle, ztitle: str
|
||||||
|
def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
xlsx_file (str): path to xlsx
|
||||||
|
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
|
||||||
|
giving the concerned chart properties
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: information of charts
|
||||||
|
"""
|
||||||
|
|
||||||
|
workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
||||||
|
worksheet: Worksheet = workbook.active
|
||||||
|
charts: List[ChartBase] = worksheet._charts
|
||||||
|
|
||||||
|
chart_set: Dict[str, Any] = {}
|
||||||
|
chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
|
||||||
|
for ch in charts:
|
||||||
|
series: List[str] = []
|
||||||
|
for ser in ch.series:
|
||||||
|
value_num = ser.val.numRef.f\
|
||||||
|
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
|
||||||
|
else ""
|
||||||
|
value_str = ser.val.strRef.f\
|
||||||
|
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
|
||||||
|
else ""
|
||||||
|
categ_num = ser.cat.numRef.f\
|
||||||
|
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
|
||||||
|
else ""
|
||||||
|
categ_str = ser.cat.strRef.f\
|
||||||
|
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
|
||||||
|
else ""
|
||||||
|
series.append( "{:},{:},{:},{:}".format( value_num, value_str
|
||||||
|
, categ_num, categ_str
|
||||||
|
)
|
||||||
|
)
|
||||||
|
series: str = ";".join(series)
|
||||||
|
|
||||||
|
# TODO: maybe more aspects, like chart type
|
||||||
|
info: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
if "title" in chart_props:
|
||||||
|
info["title"] = ch.title.tx.rich.p[0].r[0].t
|
||||||
|
if "anchor" in chart_props:
|
||||||
|
info["anchor"] = [ ch.anchor.editAs
|
||||||
|
, ch.anchor._from.col, ch.anchor.to.row
|
||||||
|
, ch.anchor.to.col, ch.anchor.to.row
|
||||||
|
]
|
||||||
|
if "width" in chart_props:
|
||||||
|
info["width"] = ch.width
|
||||||
|
if "height" in chart_props:
|
||||||
|
info["height"] = ch.height
|
||||||
|
if "type" in chart_props:
|
||||||
|
info["type"] = ch.tagname
|
||||||
|
if "direction" in chart_props:
|
||||||
|
info["direction"] = ch.barDir
|
||||||
|
|
||||||
|
if "xtitle" in chart_props:
|
||||||
|
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
|
||||||
|
if "ytitle" in chart_props:
|
||||||
|
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
|
||||||
|
if "ztitle" in chart_props:
|
||||||
|
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
|
||||||
|
chart_set[series] = info
|
||||||
|
return chart_set
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
|
||||||
|
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
||||||
|
worksheet1: Worksheet = workbook1.active
|
||||||
|
charts: List[ChartBase] = worksheet1._charts
|
||||||
|
#print(len(charts))
|
||||||
|
#print(type(charts[0]))
|
||||||
|
#
|
||||||
|
#print(len(charts[0].series))
|
||||||
|
#print(type(charts[0].series[0]))
|
||||||
|
#print(type(charts[0].series[0].val))
|
||||||
|
##print(charts[0].series[0].val)
|
||||||
|
#print(charts[0].series[0].val.numRef.f)
|
||||||
|
#
|
||||||
|
#print(type(charts[0].series[0].cat))
|
||||||
|
##print(charts[0].series[0].cat)
|
||||||
|
#print(charts[0].series[0].cat.numRef)
|
||||||
|
#print(charts[0].series[0].cat.strRef)
|
||||||
|
#print(charts[0].series[0].cat.strRef.f)
|
||||||
|
|
||||||
|
#print(type(charts[0].title.tx.strRef))
|
||||||
|
#print(type(charts[0].title.tx.rich))
|
||||||
|
#print(type(charts[0].title.txPr))
|
||||||
|
#print(len(charts[0].title.tx.rich.p))
|
||||||
|
#print(len(charts[0].title.tx.rich.p[0].r))
|
||||||
|
#print(type(charts[0].title.tx.rich.p[0].r[0]))
|
||||||
|
#print(type(charts[0].title.tx.rich.p[0].r[0].t))
|
||||||
|
#print(charts[0].title.tx.rich.p[0].r[0].t)
|
||||||
|
|
||||||
|
#print(type(charts[0].anchor))
|
||||||
|
#print(charts[0].anchor.editAs)
|
||||||
|
#print(charts[0].anchor._from.col, charts[0].anchor.to.row)
|
||||||
|
#print(charts[0].anchor.to.col, charts[0].anchor.to.row)
|
||||||
|
|
||||||
|
#df1 = pd.read_excel(path1)
|
||||||
|
#print(df1)
|
||||||
|
print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
|
||||||
@@ -37,6 +37,12 @@
|
|||||||
"type": "cloud_file",
|
"type": "cloud_file",
|
||||||
"path": "https://drive.usercontent.google.com/download?id=1yiTCGZvGccWET9u8K7looD3ybH7PO9gb&export=download&authuser=0&confirm=t&uuid=65f54a6f-bb2e-40c3-8a76-091d785a5aca&at=APZUnTVbeO6maMvzItLvSwdBEZoM:1703595892144",
|
"path": "https://drive.usercontent.google.com/download?id=1yiTCGZvGccWET9u8K7looD3ybH7PO9gb&export=download&authuser=0&confirm=t&uuid=65f54a6f-bb2e-40c3-8a76-091d785a5aca&at=APZUnTVbeO6maMvzItLvSwdBEZoM:1703595892144",
|
||||||
"dest": "Create_column_charts_using_statistics_gold.xlsx"
|
"dest": "Create_column_charts_using_statistics_gold.xlsx"
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"chart_props": [
|
||||||
|
"type",
|
||||||
|
"direction"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user