completed all the incomplete tasks stored under libreoffice_calc before added metric check_data_validations
331 lines
14 KiB
Python
331 lines
14 KiB
Python
import logging
|
|
import operator
|
|
from numbers import Number
|
|
from typing import Any, Union
|
|
from typing import Dict, List
|
|
import os.path
|
|
import itertools
|
|
|
|
import openpyxl
|
|
import pandas as pd
|
|
from openpyxl import Workbook
|
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
#from openpyxl.worksheet.cell_range import MultiCellRange
|
|
from openpyxl.worksheet.datavalidation import DataValidation
|
|
|
|
from .utils import load_charts, load_sparklines, _match_value_to_rule
|
|
|
|
logger = logging.getLogger("desktopenv.metric.table")
|
|
|
|
|
|
def compare_table(actual: str, expected: str, **options) -> float:
|
|
"""
|
|
Args:
|
|
actual (str): path to result xlsx
|
|
expected (str): path to gold xlsx
|
|
options (Dict[str, List[str]]): dict like
|
|
{
|
|
"features": list of str for other features, supports:
|
|
* sparkline
|
|
* chart
|
|
* number_format
|
|
"chart_props": list of str, giving the concerned chart properties
|
|
"as_shown": bool, TODO
|
|
}
|
|
|
|
Return:
|
|
float: the score
|
|
"""
|
|
|
|
if actual is None:
|
|
return 0.
|
|
|
|
if options.get("as_shown", False):
|
|
expected_csv: str = os.path.splitext(expected)[0] + ".csv"
|
|
actual_csv: str = os.path.splitext(actual)[0] + ".csv"
|
|
|
|
with open(expected_csv) as f:
|
|
expected_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
|
|
, map( lambda l: l.strip()
|
|
, reversed(f.read().splitlines())
|
|
)
|
|
)
|
|
)
|
|
if options.get("ignore_case", False):
|
|
expected_lines = [l.lower() for l in expected_lines]
|
|
with open(actual_csv) as f:
|
|
actual_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
|
|
, map( lambda l: l.strip()
|
|
, reversed(f.read().splitlines())
|
|
)
|
|
)
|
|
)
|
|
if options.get("ignore_case", False):
|
|
actual_lines = [l.lower() for l in actual_lines]
|
|
metric: bool = expected_lines==actual_lines
|
|
logger.debug("Content Metric just as shown: %s", metric)
|
|
else:
|
|
df1 = pd.read_excel(expected)
|
|
df2 = pd.read_excel(actual)
|
|
metric: bool = df1.equals(df2)
|
|
logger.debug("Normal Content Metric: {:}".format(metric))
|
|
|
|
features: List[str] = options.get("features", [])
|
|
for ftr in features:
|
|
workbook1: Workbook = openpyxl.load_workbook(actual)
|
|
workbook2: Workbook = openpyxl.load_workbook(expected)
|
|
|
|
if ftr == "sparkline":
|
|
sp1 = load_sparklines(actual)
|
|
sp2 = load_sparklines(expected)
|
|
new_metric: bool = sp1 == sp2
|
|
logger.debug("Sparkline Metric: {:}".format(new_metric))
|
|
elif ftr == "chart":
|
|
charts1 = load_charts(workbook1, **options)
|
|
charts2 = load_charts(workbook2, **options)
|
|
new_metric: bool = charts1 == charts2
|
|
logger.debug("Chart Metric: {:}".format(new_metric))
|
|
elif ftr == "number_format":
|
|
number_formats1: List[str] = [c.number_format.lower() \
|
|
for col in workbook1.active.iter_cols() \
|
|
for c in col \
|
|
if c.data_type == "n"
|
|
]
|
|
number_formats2: List[str] = [c.number_format.lower() \
|
|
for col in workbook2.active.iter_cols() \
|
|
for c in col \
|
|
if c.data_type == "n"
|
|
]
|
|
new_metric: bool = number_formats1 == number_formats2
|
|
logger.debug("Number Format Metric: {:}".format(new_metric))
|
|
else:
|
|
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
|
|
metric = metric and new_metric
|
|
|
|
return float(metric)
|
|
|
|
|
|
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
|
if result is None:
|
|
return 0.
|
|
|
|
# workbook: Workbook = openpyxl.load_workbook(filename=result)
|
|
workbook = pd.ExcelFile(result)
|
|
worksheet_names: List[str] = workbook.sheet_names
|
|
|
|
passes = True
|
|
for r in rules:
|
|
if r["type"] == "sheet_name":
|
|
expected_name: str = worksheet_names[r["sheet_idx"]]
|
|
actual_name: str = r["sheet_name"]
|
|
metric: bool = expected_name == actual_name
|
|
logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
|
|
passes = passes and metric
|
|
elif r["type"] == "sheet_data":
|
|
if isinstance(r["sheet_idx0"], int):
|
|
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
|
|
else:
|
|
file_name: str
|
|
sheet_idx: str
|
|
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
|
|
sheet_idx: int = int(sheet_idx)
|
|
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
|
|
if isinstance(r["sheet_idx1"], int):
|
|
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
|
|
else:
|
|
file_name: str
|
|
sheet_idx: str
|
|
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
|
|
sheet_idx: int = int(sheet_idx)
|
|
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
|
|
metric: bool = df1.equals(df2)
|
|
logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
|
|
passes = passes and metric
|
|
else:
|
|
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
|
|
|
|
return float(passes)
|
|
|
|
|
|
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
|
if result is None:
|
|
return 0.
|
|
|
|
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
|
|
return float(worksheet.freeze_panes == rules["position"])
|
|
|
|
|
|
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
|
|
if result is None:
|
|
return 0.
|
|
|
|
worksheet = openpyxl.load_workbook(filename=result).active
|
|
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
|
|
return float(getattr(operator, rules["relation"])(zoom_scale
|
|
, rules["ref_value"]
|
|
)
|
|
)
|
|
|
|
def check_data_validations(result: str, rules: List[Dict[str, Dict[str, Any]]]) -> float:
|
|
"""
|
|
Args:
|
|
result (str): path to the concerned xlsx file
|
|
rules (List[Dict[str, Dict[str, Any]]]): list of dict like
|
|
{
|
|
<str as attribute>: {
|
|
"method": str
|
|
"ref": something
|
|
}
|
|
}
|
|
Available attributes:
|
|
* ranges
|
|
* type
|
|
* formula1
|
|
* formula2
|
|
* operator
|
|
* allowBlank
|
|
* showDropDown
|
|
* showInputMessage
|
|
* showErrorMessage
|
|
* error
|
|
* errorTitle
|
|
* errorStyle
|
|
* prompt
|
|
* promptTitle
|
|
* imeMode
|
|
|
|
Returns:
|
|
float
|
|
"""
|
|
|
|
workbook: Workbook = openpyxl.load_workbook(result)
|
|
worksheet: Worksheet = workbook.active
|
|
data_validators: List[DataValidation] = worksheet.data_validations.dataValidation
|
|
|
|
total_metric = True
|
|
for dat_vldt in data_validators:
|
|
metric = False
|
|
for r in rules:
|
|
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
|
|
, mr
|
|
)\
|
|
for attrbt, mr in r.items()
|
|
)
|
|
if metric:
|
|
break
|
|
total_metric = total_metric and metric
|
|
if not total_metric:
|
|
break
|
|
return float(total_metric)
|
|
|
|
if __name__ == '__main__':
|
|
# path1 = ""
|
|
# path2 = ""
|
|
# print(compare_table(path1, path2))
|
|
|
|
# path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
|
|
# path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
|
|
# print(compare_with_sparklines(path1, path2))
|
|
|
|
# path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
|
|
# path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
|
|
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
# worksheet1: Worksheet = workbook1.active
|
|
# print(worksheet1.freeze_panes)
|
|
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
|
|
# worksheet2: Worksheet = workbook2.active
|
|
# print(worksheet2.freeze_panes)
|
|
# rule = {"position": "C6"}
|
|
# print(check_xlsx_freeze(path1, rule))
|
|
|
|
# path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
|
|
# rule = [ { "type": "sheet_name"
|
|
# , "sheet_idx": 0
|
|
# , "sheet_name": "Sheet1"
|
|
# }
|
|
# , { "type": "sheet_data"
|
|
# , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
|
|
# , "sheet_idx1": 1
|
|
# }
|
|
# , { "type": "sheet_name"
|
|
# , "sheet_idx": 2
|
|
# , "sheet_name": "Sheet2"
|
|
# }
|
|
# ]
|
|
# print(check_sheet_list(path1, rule))
|
|
|
|
# path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
|
# path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
|
# print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
|
|
|
|
# path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
|
|
# path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
|
|
# path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
|
|
# path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
|
|
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
# worksheet1: Worksheet = workbook1.active
|
|
# import itertools
|
|
# for col, r in itertools.product( ['A', 'B']
|
|
# , range(1, 20)
|
|
# ):
|
|
# position: str = "{:}{:d}".format(col, r)
|
|
# print(worksheet1[position])
|
|
# print(worksheet1[position].value)
|
|
# print(worksheet1[position].number_format)
|
|
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
|
|
# worksheet2: Worksheet = workbook2.active
|
|
# for col, r in itertools.product( ['A', 'B']
|
|
# , range(1, 20)
|
|
# ):
|
|
# position: str = "{:}{:d}".format(col, r)
|
|
# print(worksheet2[position])
|
|
# print(worksheet2[position].value)
|
|
# print(worksheet2[position].number_format)
|
|
# print(compare_table(path1, path2, features=["number_format"]))
|
|
|
|
# path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
|
|
# path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
|
|
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
# worksheet1: Worksheet = workbook1.active
|
|
# print(worksheet1.sheet_view.zoomScale)
|
|
# print(type(worksheet1.sheet_view.zoomScale))
|
|
#
|
|
# import os
|
|
# import os.path
|
|
# for wb in filter( lambda f: f.endswith(".xlsx")
|
|
# , os.listdir("../../任务数据/LibreOffice Calc/")
|
|
# ):
|
|
# path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
|
|
# print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
|
|
# print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
|
|
# print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
|
|
|
|
#path1 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id.xlsx"
|
|
#path2 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id_gold.xlsx"
|
|
#data_frame: pd.DataFrame = pd.read_excel(path1)
|
|
#print(data_frame)
|
|
#print(compare_table(path1, path2, as_shown=True))
|
|
|
|
#from openpyxl.worksheet.cell_range import MultiCellRange
|
|
|
|
path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx"
|
|
#worksheet: Worksheet = openpyxl.load_workbook(filename=path).active
|
|
##print(worksheet.data_validations)
|
|
#print(type(worksheet.data_validations.dataValidation))
|
|
#for dat_vldt in worksheet.data_validations.dataValidation:
|
|
#print(dat_vldt.sqref)
|
|
#print(all(r in MultiCellRange("D2:D30 B1:B60") for r in dat_vldt.sqref))
|
|
print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range"
|
|
, "ref": ["D2:D29", "D2:D1048576"]
|
|
}
|
|
, "type": { "method": "eq"
|
|
, "ref": "list"
|
|
}
|
|
, "formula1": { "method": "str_set_eq"
|
|
, "ref": ["Pass", "Fail", "Held"]
|
|
}
|
|
}
|
|
]
|
|
)
|
|
)
|