import pandas as pd import zipfile import lxml.etree import lxml.cssselect from lxml.etree import _Element import xmltodict #import pylightxl import openpyxl from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet from openpyxl.chart._chart import ChartBase from typing import Dict, List from typing import Any def compare_table(actual, expected): df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) # Compare the DataFrames return 1 if df1.equals(df2) else 0 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") ] _xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) #print(_sparklines_selector.css) def _load_sparklines(xlsx_file: str) -> Dict[str, str]: """ This function modifies data_frame in-place Args: xlsx_file (str): path to xlsx Returns: List[Dict[str, str]]: sparkline definitions in form of { "F3": "Sheet1!C3:E3" } """ # read xlsx with zipfile.ZipFile(xlsx_file, "r") as z_f: with z_f.open("xl/worksheets/sheet1.xml") as f: sheet1: _Element = lxml.etree.fromstring(f.read()) sparklines: List[_Element] = _sparklines_selector(sheet1) sparklines_dict: Dict[str, str] = {} for sp_l in sparklines: sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml , process_namespaces=True , namespaces=_xlsx_ns_imapping ) sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] return sparklines_dict def compare_with_sparklines(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) print("Normal Contents Metric: {:}".format(normal_content_metric)) sp1 = _load_sparklines(actual) sp2 = _load_sparklines(expected) sparkline_metric: bool = sp1 == sp2 print("Sparkline Metric: {:}".format(sparkline_metric)) return float(normal_content_metric and sparkline_metric) def _load_charts(xlsx_file: str) -> Dict[str, Any]: """ Args: xlsx_file (str): path to xlsx Returns: Dict[str, Any]: information of charts """ workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) worksheet: Worksheet = workbook.active charts: List[ChartBase] = worksheet._charts chart_set: Dict[str, Any] = {} for ch in charts: series: List[str] = [] for ser in ch.series: value_num = ser.val.numRef.f\ if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\ else "" value_str = ser.val.strRef.f\ if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\ else "" categ_num = ser.cat.numRef.f\ if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\ else "" categ_str = ser.cat.strRef.f\ if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\ else "" series.append( "{:},{:},{:},{:}".format( value_num, value_str , categ_num, categ_str ) ) series: str = ";".join(series) # TODO: maybe more aspects, like chart type info: Dict[str, Any] = {} chart_set[series] = info return chart_set def compare_with_charts(actual: str, expected: str) -> float: df1 = pd.read_excel(actual) df2 = pd.read_excel(expected) normal_content_metric: bool = df1.equals(df2) print("Normal Contents Metric: {:}".format(normal_content_metric)) charts1 = _load_charts(actual) charts2 = _load_charts(expected) chart_metric: bool = charts1==charts2 print("Chart Metric: {:}".format(chart_metric)) return float(normal_content_metric and chart_metric) def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: #workbook: Workbook = openpyxl.load_workbook(filename=result) workbook = pd.ExcelFile(result) worksheet_names: List[str] = workbook.sheet_names passes = True for r in rules: if r["type"]=="sheet_name": expected_name: str = worksheet_names[r["sheet_idx"]] actual_name: str = r["sheet_name"] metric: bool = expected_name==actual_name print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric)) passes = passes and metric elif r["type"]=="sheet_data": if isinstance(r["sheet_idx0"], int): df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx) if isinstance(r["sheet_idx1"], int): df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx) metric: bool = df1.equals(df2) print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric)) passes = passes and metric else: raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"])) return float(passes) def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: worksheet: Worksheet = openpyxl.load_workbook(filename=result).active return float(worksheet.freeze_panes==rules["position"]) if __name__ == '__main__': #path1 = "" #path2 = "" #print(compare_table(path1, path2)) #path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx" #path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx" #print(compare_with_sparklines(path1, path2)) #path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx" #path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx" #workbook1: Workbook = openpyxl.load_workbook(filename=path1) #worksheet1: Worksheet = workbook1.active #print(worksheet1.freeze_panes) #workbook2: Workbook = openpyxl.load_workbook(filename=path2) #worksheet2: Worksheet = workbook2.active #print(worksheet2.freeze_panes) #rule = {"position": "C6"} #print(check_xlsx_freeze(path1, rule)) #path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx" #rule = [ { "type": "sheet_name" #, "sheet_idx": 0 #, "sheet_name": "Sheet1" #} #, { "type": "sheet_data" #, "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0" #, "sheet_idx1": 1 #} #, { "type": "sheet_name" #, "sheet_idx": 2 #, "sheet_name": "Sheet2" #} #] #print(check_sheet_list(path1, rule)) path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" #workbook1: Workbook = openpyxl.load_workbook(filename=path1) #worksheet1: Worksheet = workbook1.active #charts: List[ChartBase] = worksheet1._charts #print(len(charts)) #print(type(charts[0])) # #print(len(charts[0].series)) #print(type(charts[0].series[0])) #print(type(charts[0].series[0].val)) ##print(charts[0].series[0].val) #print(charts[0].series[0].val.numRef.f) # #print(type(charts[0].series[0].cat)) ##print(charts[0].series[0].cat) #print(charts[0].series[0].cat.numRef) #print(charts[0].series[0].cat.strRef) #print(charts[0].series[0].cat.strRef.f) # #df1 = pd.read_excel(path1) #print(df1) path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" print(compare_with_charts(path1, path2))