ver Mar4th
updated range-wise fuzzy match mode for compare_table
This commit is contained in:
@@ -13,7 +13,7 @@ from odf.text import P
|
||||
from odf.text import Span
|
||||
from skimage.color import deltaE_ciede2000
|
||||
from skimage.color import rgb2lab
|
||||
from fuzzywuzzy import fuzz
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.docs")
|
||||
|
||||
|
||||
@@ -5,19 +5,20 @@ import os.path
|
||||
# import operator
|
||||
from numbers import Number
|
||||
from typing import Any, Union, cast, Callable, Iterable
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, List, Tuple, Set
|
||||
|
||||
import openpyxl
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.cell.cell import Cell
|
||||
# from openpyxl.worksheet.cell_range import MultiCellRange
|
||||
from openpyxl.worksheet.cell_range import MultiCellRange
|
||||
from openpyxl.worksheet.datavalidation import DataValidation
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
|
||||
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
|
||||
, load_filters, load_pivot_tables
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
# from openpyxl.utils import coordinate_to_tuple
|
||||
|
||||
@@ -157,8 +158,8 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
return 0.
|
||||
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
|
||||
|
||||
sheet1 = sheet1.round()
|
||||
sheet2 = sheet2.round()
|
||||
sheet1 = sheet1.round(error_limit)
|
||||
sheet2 = sheet2.round(error_limit)
|
||||
metric: bool = sheet1.equals(sheet2)
|
||||
logger.debug("Sheet1: \n%s", str(sheet1))
|
||||
logger.debug("Sheet2: \n%s", str(sheet2))
|
||||
@@ -186,6 +187,61 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
|
||||
# }}} Compare Sheet Data by Printed Value #
|
||||
|
||||
elif r["type"] == "sheet_fuzzy":
|
||||
# Fuzzy Match for Ranges {{{ #
|
||||
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
|
||||
# sheet_idx1: as sheet_idx0
|
||||
# rules: list of dict, each dict is like
|
||||
# { "range": ["A1:B6", "C2:E5"],
|
||||
# "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
|
||||
# "threshold": 85, // for fuzzy match
|
||||
# "ignore_case": true | false,
|
||||
# "ignore_chars": " ()", # filtered out
|
||||
# "trim_leadings": "+ ", # filtered by lstrip
|
||||
# "trim_trailings": "", # filtered by rstrip
|
||||
# "normalization": [["Rd", "Road"]], # filtered by replace
|
||||
# }
|
||||
|
||||
sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected)
|
||||
sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected)
|
||||
total_metric = True
|
||||
for rl in r["rules"]:
|
||||
for rng in MultiCellRange(rl["range"]):
|
||||
for cdn in rng.cells:
|
||||
value1: str = str(read_cell_value(*sheet1, cdn))
|
||||
value2: str = str(read_cell_value(*sheet2, cdn))
|
||||
|
||||
for rplc in rl.get("normalization", []):
|
||||
value1 = value1.replace(rplc[0], rplc[1])
|
||||
value2 = value2.replace(rplc[0], rplc[1])
|
||||
if "trim_leadings" in rl:
|
||||
value1 = value1.lstrip(rl["trim_leadings"])
|
||||
value2 = value2.lstrip(rl["trim_leadings"])
|
||||
if "trim_trailings" in rl:
|
||||
value1 = value1.rstrip(rl["trim_trailings"])
|
||||
value2 = value2.rstrip(rl["trim_trailings"])
|
||||
if "ignore_chars" in rl:
|
||||
ignore_chars: Set[str] = set(rl["ignore_chars"])
|
||||
value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1))
|
||||
value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2))
|
||||
if rl.get("ignore_case", False):
|
||||
value1 = value1.lower()
|
||||
value2 = value2.lower()
|
||||
|
||||
if rl["type"]=="includes":
|
||||
metric: bool = value1 in value2
|
||||
if rl["type"]=="includes_by":
|
||||
metric: bool = value2 in value1
|
||||
if rl["type"]=="fuzzy_match":
|
||||
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
|
||||
if rl["type"]=="exact_match":
|
||||
metric: bool = value1==value2
|
||||
total_metric = total_metric and metric
|
||||
|
||||
metric: bool = total_metric
|
||||
logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
|
||||
# }}} Fuzzy Match for Ranges #
|
||||
|
||||
elif r["type"] == "sparkline":
|
||||
# Compare Sparklines {{{ #
|
||||
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
|
||||
|
||||
Reference in New Issue
Block a user