Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/table.py
David Chang ba77c276e6 ver Dec25thv2
implemented functions to load sparklines from xlsx
2023-12-25 20:14:03 +08:00

75 lines
2.6 KiB
Python

import pandas as pd
import zipfile
import lxml.etree
import lxml.cssselect
from lxml.etree import _Element
import xmltodict
#import pylightxl
from typing import Dict, List
#from typing import Any
def compare_table(actual, expected):
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
# Compare the DataFrames
return 1 if df1.equals(df2) else 0
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
#print(_sparklines_selector.css)
def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
Returns:
List[Dict[str, str]]: sparkline definitions in form of
{
"F3": "Sheet1!C3:E3"
}
"""
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/worksheets/sheet1.xml") as f:
sheet1: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet1)
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
return sparklines_dict
def compare_with_sparklines(actual: str, expected: str) -> float:
df1 = pd.read_excel(actual)
df2 = pd.read_excel(expected)
normal_content_metric: bool = df1.equals(df2)
sp1 = _load_sparklines(actual)
sp2 = _load_sparklines(expected)
sparkline_metric: bool = sp1 == sp2
return float(normal_content_metric and sparkline_metric)
if __name__ == '__main__':
#path1 = ""
#path2 = ""
#print(compare_table(path1, path2))
path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
print(compare_with_sparklines(path1, path2))