ver Jan21st

reconstructed calc metrics
not updated the configs yet
This commit is contained in:
David Chang
2024-01-21 22:55:52 +08:00
parent 21314346c5
commit 4514c32269
2 changed files with 235 additions and 194 deletions

View File

@@ -1,10 +1,11 @@
import logging
import operator
from numbers import Number
from typing import Any, Union
from typing import Dict, List
from typing import Any, Union, cast, Callable
from typing import Dict, List, Tuple
import os.path
import itertools
import functools
import openpyxl
import pandas as pd
@@ -17,206 +18,241 @@ from .utils import load_charts, load_sparklines, _match_value_to_rule
logger = logging.getLogger("desktopenv.metric.table")
BOOK = Union[pd.ExcelFile, Workbook, str]
def _parse_sheet_idx( sheet_idx: Union[int, str]
, result: BOOK, expected: BOOK
, result_sheet_names: List[str]
, expected_sheet_names: List[str]
) -> Tuple[BOOK, str]:
# function _parse_sheet_idx {{{ #
if isinstance(sheet_idx, int):
index: str = result_sheet_names[sheet_idx]
book: BOOK = result
elif sheet_idx.startswith("RI"):
index: str = result_sheet_names[int(sheet_idx[2:])]
book: BOOK = result
elif sheet_idx.startswith("RN"):
index: str = sheet_idx[2:]
book: BOOK = result
elif sheet_idx.startswith("EI"):
index: str = expected_sheet_names[int(sheet_idx[2:])]
book: BOOK = expected
elif sheet_idx.startswith("EN"):
index: str = sheet_idx[2:]
book: BOOK = expected
else:
logger.error("Unrecognized sheet index")
raise ValueError("Unrecognized sheet index")
return book, index
# }}} function _parse_sheet_idx #
def compare_table(actual: str, expected: str, **options) -> float:
SHEET = Union[pd.DataFrame, Worksheet, List[str]]
def _load_sheet(book: BOOK, index: str) -> SHEET:
# function _load_sheet {{{ #
if isinstance(book, str):
book: str = cast(str, book)
csv_name: str = "{:}-{:}.csv".format(os.path.splitext(book)[0], index)
with open(csv_name) as f:
csv_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
return csv_lines
if isinstance(book, pd.ExcelFile):
return pd.read_excel(book, index)
if isinstance(book, Workbook):
return book[index]
logger.error("Not supported workbook format")
raise NotImplementedError("Not supported workbook format")
# }}} function _load_sheet #
def compare_table(result: str, expected: str, **options) -> float:
# function compare_table {{{ #
"""
Args:
actual (str): path to result xlsx
expected (str): path to gold xlsx
options (Dict[str, List[str]]): dict like
result (str): path to result xlsx
expected (str): path to golden xlsx
rules (List[Dict[str, Any]]): list of dict like
{
"features": list of str for other features, supports:
* sparkline
* chart
* number_format
"chart_props": list of str, giving the concerned chart properties
"as_shown": bool, TODO
"type": str,
<str as parameters>: anything
}
as sequential rules
Return:
Returns:
float: the score
"""
if actual is None:
return 0.
if options.get("as_shown", False):
expected_csv: str = os.path.splitext(expected)[0] + ".csv"
actual_csv: str = os.path.splitext(actual)[0] + ".csv"
with open(expected_csv) as f:
expected_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
if options.get("ignore_case", False):
expected_lines = [l.lower() for l in expected_lines]
with open(actual_csv) as f:
actual_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
if options.get("ignore_case", False):
actual_lines = [l.lower() for l in actual_lines]
metric: bool = expected_lines==actual_lines
logger.debug("Content Metric just as shown: %s", metric)
else:
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
metric: bool = df1.equals(df2)
logger.debug("Normal Content Metric: {:}".format(metric))
features: List[str] = options.get("features", [])
for ftr in features:
workbook1: Workbook = openpyxl.load_workbook(actual)
workbook2: Workbook = openpyxl.load_workbook(expected)
if ftr == "sparkline":
sp1 = load_sparklines(actual)
sp2 = load_sparklines(expected)
new_metric: bool = sp1 == sp2
logger.debug("Sparkline Metric: {:}".format(new_metric))
elif ftr == "chart":
charts1 = load_charts(workbook1, **options)
charts2 = load_charts(workbook2, **options)
new_metric: bool = charts1 == charts2
logger.debug("Chart Metric: {:}".format(new_metric))
elif ftr == "number_format":
number_formats1: List[str] = [c.number_format.lower() \
for col in workbook1.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
number_formats2: List[str] = [c.number_format.lower() \
for col in workbook2.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
new_metric: bool = number_formats1 == number_formats2
logger.debug("Number Format Metric: {:}".format(new_metric))
else:
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
metric = metric and new_metric
return float(metric)
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
if result is None:
return 0.
# workbook: Workbook = openpyxl.load_workbook(filename=result)
workbook = pd.ExcelFile(result)
worksheet_names: List[str] = workbook.sheet_names
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(xlworkbookr, engine="openpyxl")
worksheetr_names: List[str] = pdworkbookr.sheet_names
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(xlworkbooke, engine="openpyxl")
worksheete_names: List[str] = pdworkbooke.sheet_names
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\
functools.partial( _parse_sheet_idx
, result_sheet_names=worksheetr_names
, expected_sheet_names=worksheete_names
)
passes = True
for r in rules:
for r in options["rules"]:
if r["type"] == "sheet_name":
expected_name: str = worksheet_names[r["sheet_idx"]]
actual_name: str = r["sheet_name"]
metric: bool = expected_name == actual_name
logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
passes = passes and metric
# Compare Sheet Names {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
metric: bool = worksheetr_names==worksheete_names
logger.debug("Assertion: %s.sheet_names == %s.sheet_names - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Names #
elif r["type"] == "sheet_data":
if isinstance(r["sheet_idx0"], int):
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
if isinstance(r["sheet_idx1"], int):
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
metric: bool = df1.equals(df2)
logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
passes = passes and metric
# Compare Sheet Data by Internal Value {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke))
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
metric: bool = sheet1.equals(sheet2)
logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Internal Value #
elif r["type"] == "sheet_print":
# Compare Sheet Data by Printed Value {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# ignore_case: optional, defaults to False
sheet1: List[str] = _load_sheet(*parse_idx(r["sheet_idx0"], result, expected))
sheet2: List[str] = _load_sheet(*parse_idx(r["sheet_idx1"], result, expected))
if r.get("ignore_case", False):
sheet1 = [l.lower() for l in sheet1]
sheet2 = [l.lower() for l in sheet2]
metric: bool = sheet1 == sheet2
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Printed Value #
elif r["type"] == "sparkline":
# Compare Sparklines {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sparkline1: Dict[str, str] = load_sparklines(*parse_idx(r["sheet_idx0"], result, expected))
sparkline2: Dict[str, str] = load_sparklines(*parse_idx(r["sheet_idx1"], result, expected))
metric: bool = sparkline1 == sparkline2
logger.debug("Assertion: %s.sp == %.sp - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sparklines #
elif r["type"] == "chart":
# Compare Charts {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# chart_props: list of str, see utils.load_charts
charts1: Dict[str, Any] = load_charts(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
charts2: Dict[str, Any] = load_charts(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = charts1 == charts2
logger.debug("Assertion: %s[chart] == %s[chart] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Charts #
elif r["type"] == "number_format":
# Compare Number Formats {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.data_type=="n"]
number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.data_type=="n"]
metric: bool = number_formats1 == number_formats2
logger.debug("Assertion: %s.nf == %s.nf - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Number Formats #
elif r["type"] == "freeze":
# Compare Freezing {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
metric: bool = sheet1.freeze_panes == sheet2.freeze_panes
logger.debug( "Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
, r["sheet_idx0"], sheet1.freeze_panes
, r["sheet_idx1"], sheet2.freeze_panes
, metric
)
# }}} Compare Freezing #
elif r["type"] == "zoom":
# Check Zooming {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# method: str
# ref: value
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
zoom_scale: Number = sheet.sheet_view.zoomScale or 100.
metric: bool = _match_value_to_rule(zoom_scale, r)
logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], metric)
# }}} Check Zooming #
elif r["type"] == "data_validation":
# Check Data Validation {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# dv_props: list of dict like {attribute: "method": str, "ref": anythin}
# available attributes:
# * ranges
# * type
# * formula1
# * formula2
# * operator
# * allowBlank
# * showDropDown
# * showInputMessage
# * showErrorMessage
# * error
# * errorTitle
# * errorStyle
# * prompt
# * promptTitle
# * imeMode
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
data_validators: List[DataValidation] = sheet.data_validations.dataValidation
total_metric = True
for dat_vldt in data_validators:
metric = False
for r in r["dv_props"]:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in r.items()
)
if metric:
break
total_metric = total_metric and metric
if not total_metric:
break
logger.debug("Assertion: %s.data_validation - %s", r["sheet_idx"], total_metric)
metric: bool = total_metric
# }}} Check Data Validation #
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
passes = passes and metric
return float(passes)
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
if result is None:
return 0.
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
return float(worksheet.freeze_panes == rules["position"])
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
if result is None:
return 0.
worksheet = openpyxl.load_workbook(filename=result).active
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
return float(getattr(operator, rules["relation"])(zoom_scale
, rules["ref_value"]
)
)
def check_data_validations(result: str, rules: List[Dict[str, Dict[str, Any]]]) -> float:
"""
Args:
result (str): path to the concerned xlsx file
rules (List[Dict[str, Dict[str, Any]]]): list of dict like
{
<str as attribute>: {
"method": str
"ref": something
}
}
Available attributes:
* ranges
* type
* formula1
* formula2
* operator
* allowBlank
* showDropDown
* showInputMessage
* showErrorMessage
* error
* errorTitle
* errorStyle
* prompt
* promptTitle
* imeMode
Returns:
float
"""
workbook: Workbook = openpyxl.load_workbook(result)
worksheet: Worksheet = workbook.active
data_validators: List[DataValidation] = worksheet.data_validations.dataValidation
total_metric = True
for dat_vldt in data_validators:
metric = False
for r in rules:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in r.items()
)
if metric:
break
total_metric = total_metric and metric
if not total_metric:
break
return float(total_metric)
# }}} function compare_table #
if __name__ == '__main__':
# path1 = ""

View File

@@ -22,21 +22,21 @@ V = TypeVar("Value")
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_namespaces = [ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
# print(_sparklines_selector.css)
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
sheet_name (str): sheet name
Returns:
List[Dict[str, str]]: sparkline definitions in form of
@@ -47,9 +47,13 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/worksheets/sheet1.xml") as f:
sheet1: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet1)
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
@@ -70,10 +74,11 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
# type: "scatterChart" | "lineChart" | "barChart"
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
giving the concerned chart properties
@@ -82,7 +87,7 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
"""
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
worksheet: Worksheet = xlsx_file.active
worksheet: Worksheet = xlsx_file[sheet_name]
charts: List[ChartBase] = worksheet._charts
chart_set: Dict[str, Any] = {}