import pandas as pd import zipfile import lxml.etree import lxml.cssselect from lxml.etree import _Element import xmltodict #import pylightxl import openpyxl #from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet from typing import Dict, List from typing import Any def compare_table(actual, expected): df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) # Compare the DataFrames return 1 if df1.equals(df2) else 0 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") ] _xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) #print(_sparklines_selector.css) def _load_sparklines(xlsx_file: str) -> Dict[str, str]: """ This function modifies data_frame in-place Args: xlsx_file (str): path to xlsx Returns: List[Dict[str, str]]: sparkline definitions in form of { "F3": "Sheet1!C3:E3" } """ # read xlsx with zipfile.ZipFile(xlsx_file, "r") as z_f: with z_f.open("xl/worksheets/sheet1.xml") as f: sheet1: _Element = lxml.etree.fromstring(f.read()) sparklines: List[_Element] = _sparklines_selector(sheet1) sparklines_dict: Dict[str, str] = {} for sp_l in sparklines: sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml , process_namespaces=True , namespaces=_xlsx_ns_imapping ) sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] return sparklines_dict def compare_with_sparklines(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) print("Normal Contents Metric: {:}".format(normal_content_metric)) sp1 = _load_sparklines(actual) sp2 = _load_sparklines(expected) sparkline_metric: bool = sp1 == sp2 print("Sparkline Metric: {:}".format(sparkline_metric)) return float(normal_content_metric and sparkline_metric) def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: #workbook: Workbook = openpyxl.load_workbook(filename=result) workbook = pd.ExcelFile(result) worksheet_names: List[str] = workbook.sheet_names passes = True for r in rules: if r["type"]=="sheet_name": expected_name: str = worksheet_names[r["sheet_idx"]] actual_name: str = r["sheet_name"] metric: bool = expected_name==actual_name print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric)) passes = passes and metric elif r["type"]=="sheet_data": if isinstance(r["sheet_idx0"], int): df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx) if isinstance(r["sheet_idx1"], int): df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx) metric: bool = df1.equals(df2) print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric)) passes = passes and metric else: raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"])) return float(passes) def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: worksheet: Worksheet = openpyxl.load_workbook(filename=result).active return float(worksheet.freeze_panes==rules["position"]) if __name__ == '__main__': #path1 = "" #path2 = "" #print(compare_table(path1, path2)) #path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx" #path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx" #print(compare_with_sparklines(path1, path2)) #path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx" #path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx" #workbook1: Workbook = openpyxl.load_workbook(filename=path1) #worksheet1: Worksheet = workbook1.active #print(worksheet1.freeze_panes) #workbook2: Workbook = openpyxl.load_workbook(filename=path2) #worksheet2: Worksheet = workbook2.active #print(worksheet2.freeze_panes) #rule = {"position": "C6"} #print(check_xlsx_freeze(path1, rule)) path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx" rule = [ { "type": "sheet_name" , "sheet_idx": 0 , "sheet_name": "Sheet1" } , { "type": "sheet_data" , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0" , "sheet_idx1": 1 } , { "type": "sheet_name" , "sheet_idx": 2 , "sheet_name": "Sheet2" } ] print(check_sheet_list(path1, rule))