import pandas as pd import zipfile import lxml.etree import lxml.cssselect from lxml.etree import _Element import xmltodict #import pylightxl from typing import Dict, List #from typing import Any def compare_table(actual, expected): df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) # Compare the DataFrames return 1 if df1.equals(df2) else 0 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") ] _xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) #print(_sparklines_selector.css) def _load_sparklines(xlsx_file: str) -> Dict[str, str]: """ This function modifies data_frame in-place Args: xlsx_file (str): path to xlsx Returns: List[Dict[str, str]]: sparkline definitions in form of { "F3": "Sheet1!C3:E3" } """ # read xlsx with zipfile.ZipFile(xlsx_file, "r") as z_f: with z_f.open("xl/worksheets/sheet1.xml") as f: sheet1: _Element = lxml.etree.fromstring(f.read()) sparklines: List[_Element] = _sparklines_selector(sheet1) sparklines_dict: Dict[str, str] = {} for sp_l in sparklines: sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml , process_namespaces=True , namespaces=_xlsx_ns_imapping ) sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] return sparklines_dict def compare_with_sparklines(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) sp1 = _load_sparklines(actual) sp2 = _load_sparklines(expected) sparkline_metric: bool = sp1 == sp2 return float(normal_content_metric and sparkline_metric) if __name__ == '__main__': #path1 = "" #path2 = "" #print(compare_table(path1, path2)) path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx" path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx" print(compare_with_sparklines(path1, path2))