import logging import operator from numbers import Number from typing import Any, Union from typing import Dict, List import os.path import itertools import openpyxl import pandas as pd from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet #from openpyxl.worksheet.cell_range import MultiCellRange from openpyxl.worksheet.datavalidation import DataValidation from .utils import load_charts, load_sparklines, _match_value_to_rule logger = logging.getLogger("desktopenv.metric.table") def compare_table(actual: str, expected: str, **options) -> float: """ Args: actual (str): path to result xlsx expected (str): path to gold xlsx options (Dict[str, List[str]]): dict like { "features": list of str for other features, supports: * sparkline * chart * number_format "chart_props": list of str, giving the concerned chart properties "as_shown": bool, TODO } Return: float: the score """ if actual is None: return 0. if options.get("as_shown", False): expected_csv: str = os.path.splitext(expected)[0] + ".csv" actual_csv: str = os.path.splitext(actual)[0] + ".csv" with open(expected_csv) as f: expected_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0 , map( lambda l: l.strip() , reversed(f.read().splitlines()) ) ) ) if options.get("ignore_case", False): expected_lines = [l.lower() for l in expected_lines] with open(actual_csv) as f: actual_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0 , map( lambda l: l.strip() , reversed(f.read().splitlines()) ) ) ) if options.get("ignore_case", False): actual_lines = [l.lower() for l in actual_lines] metric: bool = expected_lines==actual_lines logger.debug("Content Metric just as shown: %s", metric) else: df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) metric: bool = df1.equals(df2) logger.debug("Normal Content Metric: {:}".format(metric)) features: List[str] = options.get("features", []) for ftr in features: workbook1: Workbook = openpyxl.load_workbook(actual) workbook2: Workbook = openpyxl.load_workbook(expected) if ftr == "sparkline": sp1 = load_sparklines(actual) sp2 = load_sparklines(expected) new_metric: bool = sp1 == sp2 logger.debug("Sparkline Metric: {:}".format(new_metric)) elif ftr == "chart": charts1 = load_charts(workbook1, **options) charts2 = load_charts(workbook2, **options) new_metric: bool = charts1 == charts2 logger.debug("Chart Metric: {:}".format(new_metric)) elif ftr == "number_format": number_formats1: List[str] = [c.number_format.lower() \ for col in workbook1.active.iter_cols() \ for c in col \ if c.data_type == "n" ] number_formats2: List[str] = [c.number_format.lower() \ for col in workbook2.active.iter_cols() \ for c in col \ if c.data_type == "n" ] new_metric: bool = number_formats1 == number_formats2 logger.debug("Number Format Metric: {:}".format(new_metric)) else: raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr)) metric = metric and new_metric return float(metric) def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: if result is None: return 0. # workbook: Workbook = openpyxl.load_workbook(filename=result) workbook = pd.ExcelFile(result) worksheet_names: List[str] = workbook.sheet_names passes = True for r in rules: if r["type"] == "sheet_name": expected_name: str = worksheet_names[r["sheet_idx"]] actual_name: str = r["sheet_name"] metric: bool = expected_name == actual_name logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric)) passes = passes and metric elif r["type"] == "sheet_data": if isinstance(r["sheet_idx0"], int): df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx) if isinstance(r["sheet_idx1"], int): df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"]) else: file_name: str sheet_idx: str file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1) sheet_idx: int = int(sheet_idx) df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx) metric: bool = df1.equals(df2) logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric)) passes = passes and metric else: raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"])) return float(passes) def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: if result is None: return 0. worksheet: Worksheet = openpyxl.load_workbook(filename=result).active return float(worksheet.freeze_panes == rules["position"]) def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float: if result is None: return 0. worksheet = openpyxl.load_workbook(filename=result).active zoom_scale: Number = worksheet.sheet_view.zoomScale or 100. return float(getattr(operator, rules["relation"])(zoom_scale , rules["ref_value"] ) ) def check_data_validations(result: str, rules: List[Dict[str, Dict[str, Any]]]) -> float: """ Args: result (str): path to the concerned xlsx file rules (List[Dict[str, Dict[str, Any]]]): list of dict like { : { "method": str "ref": something } } Available attributes: * ranges * type * formula1 * formula2 * operator * allowBlank * showDropDown * showInputMessage * showErrorMessage * error * errorTitle * errorStyle * prompt * promptTitle * imeMode Returns: float """ workbook: Workbook = openpyxl.load_workbook(result) worksheet: Worksheet = workbook.active data_validators: List[DataValidation] = worksheet.data_validations.dataValidation total_metric = True for dat_vldt in data_validators: metric = False for r in rules: metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt) , mr )\ for attrbt, mr in r.items() ) if metric: break total_metric = total_metric and metric if not total_metric: break return float(total_metric) if __name__ == '__main__': # path1 = "" # path2 = "" # print(compare_table(path1, path2)) # path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx" # path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx" # print(compare_with_sparklines(path1, path2)) # path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx" # path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx" # workbook1: Workbook = openpyxl.load_workbook(filename=path1) # worksheet1: Worksheet = workbook1.active # print(worksheet1.freeze_panes) # workbook2: Workbook = openpyxl.load_workbook(filename=path2) # worksheet2: Worksheet = workbook2.active # print(worksheet2.freeze_panes) # rule = {"position": "C6"} # print(check_xlsx_freeze(path1, rule)) # path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx" # rule = [ { "type": "sheet_name" # , "sheet_idx": 0 # , "sheet_name": "Sheet1" # } # , { "type": "sheet_data" # , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0" # , "sheet_idx1": 1 # } # , { "type": "sheet_name" # , "sheet_idx": 2 # , "sheet_name": "Sheet2" # } # ] # print(check_sheet_list(path1, rule)) # path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" # path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" # print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"])) # path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx" # path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx" # path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx" # path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx" # workbook1: Workbook = openpyxl.load_workbook(filename=path1) # worksheet1: Worksheet = workbook1.active # import itertools # for col, r in itertools.product( ['A', 'B'] # , range(1, 20) # ): # position: str = "{:}{:d}".format(col, r) # print(worksheet1[position]) # print(worksheet1[position].value) # print(worksheet1[position].number_format) # workbook2: Workbook = openpyxl.load_workbook(filename=path2) # worksheet2: Worksheet = workbook2.active # for col, r in itertools.product( ['A', 'B'] # , range(1, 20) # ): # position: str = "{:}{:d}".format(col, r) # print(worksheet2[position]) # print(worksheet2[position].value) # print(worksheet2[position].number_format) # print(compare_table(path1, path2, features=["number_format"])) # path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx" # path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx" # workbook1: Workbook = openpyxl.load_workbook(filename=path1) # worksheet1: Worksheet = workbook1.active # print(worksheet1.sheet_view.zoomScale) # print(type(worksheet1.sheet_view.zoomScale)) # # import os # import os.path # for wb in filter( lambda f: f.endswith(".xlsx") # , os.listdir("../../任务数据/LibreOffice Calc/") # ): # path = os.path.join("../../任务数据/LibreOffice Calc/", wb) # print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale) # print(check_zoom(path1, {"relation": "lt", "ref_value": 100})) # print(check_zoom(path2, {"relation": "lt", "ref_value": 100})) #path1 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id.xlsx" #path2 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id_gold.xlsx" #data_frame: pd.DataFrame = pd.read_excel(path1) #print(data_frame) #print(compare_table(path1, path2, as_shown=True)) #from openpyxl.worksheet.cell_range import MultiCellRange path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx" #worksheet: Worksheet = openpyxl.load_workbook(filename=path).active ##print(worksheet.data_validations) #print(type(worksheet.data_validations.dataValidation)) #for dat_vldt in worksheet.data_validations.dataValidation: #print(dat_vldt.sqref) #print(all(r in MultiCellRange("D2:D30 B1:B60") for r in dat_vldt.sqref)) print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range" , "ref": ["D2:D29", "D2:D1048576"] } , "type": { "method": "eq" , "ref": "list" } , "formula1": { "method": "str_set_eq" , "ref": ["Pass", "Fail", "Held"] } } ] ) )