Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/utils.py
David Chang 3c04872dcf ver Jan11thv2
a new Thunderbird example w.r.t. email filter
2024-01-11 22:43:38 +08:00

168 lines
6.4 KiB
Python

import zipfile
import lxml.etree
import lxml.cssselect
from lxml.etree import _Element
import xmltodict
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.chart._chart import ChartBase
from typing import Dict, List, Set
from typing import Any
import logging
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
#print(_sparklines_selector.css)
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
Returns:
List[Dict[str, str]]: sparkline definitions in form of
{
"F3": "Sheet1!C3:E3"
}
"""
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/worksheets/sheet1.xml") as f:
sheet1: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet1)
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
return sparklines_dict
# Available Chart Properties:
# title: str
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
# width: number
# height: number
# type: "scatterChart" | "lineChart" | "barChart"
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
"""
Args:
xlsx_file (Workbook): concerned excel book
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
giving the concerned chart properties
Returns:
Dict[str, Any]: information of charts
"""
#workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
worksheet: Worksheet = xlsx_file.active
charts: List[ChartBase] = worksheet._charts
chart_set: Dict[str, Any] = {}
chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
for ch in charts:
series: List[str] = []
for ser in ch.series:
value_num = ser.val.numRef.f\
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
else ""
value_str = ser.val.strRef.f\
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
else ""
categ_num = ser.cat.numRef.f\
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
else ""
categ_str = ser.cat.strRef.f\
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
else ""
series.append( "{:},{:},{:},{:}".format( value_num, value_str
, categ_num, categ_str
)
)
series: str = ";".join(series)
# TODO: maybe more aspects, like chart type
info: Dict[str, Any] = {}
if "title" in chart_props:
info["title"] = ch.title.tx.rich.p[0].r[0].t
if "anchor" in chart_props:
info["anchor"] = [ ch.anchor.editAs
, ch.anchor._from.col, ch.anchor.to.row
, ch.anchor.to.col, ch.anchor.to.row
]
if "width" in chart_props:
info["width"] = ch.width
if "height" in chart_props:
info["height"] = ch.height
if "type" in chart_props:
info["type"] = ch.tagname
if "direction" in chart_props:
info["direction"] = ch.barDir
if "xtitle" in chart_props:
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
if "ytitle" in chart_props:
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
if "ztitle" in chart_props:
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
chart_set[series] = info
return chart_set
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k]==val for k, val in pattern.items())
if __name__ == "__main__":
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
worksheet1: Worksheet = workbook1.active
charts: List[ChartBase] = worksheet1._charts
#print(len(charts))
#print(type(charts[0]))
#
#print(len(charts[0].series))
#print(type(charts[0].series[0]))
#print(type(charts[0].series[0].val))
##print(charts[0].series[0].val)
#print(charts[0].series[0].val.numRef.f)
#
#print(type(charts[0].series[0].cat))
##print(charts[0].series[0].cat)
#print(charts[0].series[0].cat.numRef)
#print(charts[0].series[0].cat.strRef)
#print(charts[0].series[0].cat.strRef.f)
#print(type(charts[0].title.tx.strRef))
#print(type(charts[0].title.tx.rich))
#print(type(charts[0].title.txPr))
#print(len(charts[0].title.tx.rich.p))
#print(len(charts[0].title.tx.rich.p[0].r))
#print(type(charts[0].title.tx.rich.p[0].r[0]))
#print(type(charts[0].title.tx.rich.p[0].r[0].t))
#print(charts[0].title.tx.rich.p[0].r[0].t)
#print(type(charts[0].anchor))
#print(charts[0].anchor.editAs)
#print(charts[0].anchor._from.col, charts[0].anchor.to.row)
#print(charts[0].anchor.to.col, charts[0].anchor.to.row)
#df1 = pd.read_excel(path1)
#print(df1)
print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))