merged from zdy
This commit is contained in:
@@ -13,7 +13,7 @@ from odf.text import P
|
||||
from odf.text import Span
|
||||
from skimage.color import deltaE_ciede2000
|
||||
from skimage.color import rgb2lab
|
||||
from fuzzywuzzy import fuzz
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.docs")
|
||||
|
||||
|
||||
@@ -5,19 +5,20 @@ import os.path
|
||||
# import operator
|
||||
from numbers import Number
|
||||
from typing import Any, Union, cast, Callable, Iterable
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, List, Tuple, Set
|
||||
|
||||
import openpyxl
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.cell.cell import Cell
|
||||
# from openpyxl.worksheet.cell_range import MultiCellRange
|
||||
from openpyxl.worksheet.cell_range import MultiCellRange
|
||||
from openpyxl.worksheet.datavalidation import DataValidation
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
|
||||
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
|
||||
, load_filters, load_pivot_tables
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
# from openpyxl.utils import coordinate_to_tuple
|
||||
|
||||
@@ -157,8 +158,8 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
return 0.
|
||||
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
|
||||
|
||||
sheet1 = sheet1.round()
|
||||
sheet2 = sheet2.round()
|
||||
sheet1 = sheet1.round(error_limit)
|
||||
sheet2 = sheet2.round(error_limit)
|
||||
metric: bool = sheet1.equals(sheet2)
|
||||
logger.debug("Sheet1: \n%s", str(sheet1))
|
||||
logger.debug("Sheet2: \n%s", str(sheet2))
|
||||
@@ -186,6 +187,61 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
||||
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
|
||||
# }}} Compare Sheet Data by Printed Value #
|
||||
|
||||
elif r["type"] == "sheet_fuzzy":
|
||||
# Fuzzy Match for Ranges {{{ #
|
||||
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
|
||||
# sheet_idx1: as sheet_idx0
|
||||
# rules: list of dict, each dict is like
|
||||
# { "range": ["A1:B6", "C2:E5"],
|
||||
# "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
|
||||
# "threshold": 85, // for fuzzy match
|
||||
# "ignore_case": true | false,
|
||||
# "ignore_chars": " ()", # filtered out
|
||||
# "trim_leadings": "+ ", # filtered by lstrip
|
||||
# "trim_trailings": "", # filtered by rstrip
|
||||
# "normalization": [["Rd", "Road"]], # filtered by replace
|
||||
# }
|
||||
|
||||
sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected)
|
||||
sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected)
|
||||
total_metric = True
|
||||
for rl in r["rules"]:
|
||||
for rng in MultiCellRange(rl["range"]):
|
||||
for cdn in rng.cells:
|
||||
value1: str = str(read_cell_value(*sheet1, cdn))
|
||||
value2: str = str(read_cell_value(*sheet2, cdn))
|
||||
|
||||
for rplc in rl.get("normalization", []):
|
||||
value1 = value1.replace(rplc[0], rplc[1])
|
||||
value2 = value2.replace(rplc[0], rplc[1])
|
||||
if "trim_leadings" in rl:
|
||||
value1 = value1.lstrip(rl["trim_leadings"])
|
||||
value2 = value2.lstrip(rl["trim_leadings"])
|
||||
if "trim_trailings" in rl:
|
||||
value1 = value1.rstrip(rl["trim_trailings"])
|
||||
value2 = value2.rstrip(rl["trim_trailings"])
|
||||
if "ignore_chars" in rl:
|
||||
ignore_chars: Set[str] = set(rl["ignore_chars"])
|
||||
value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1))
|
||||
value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2))
|
||||
if rl.get("ignore_case", False):
|
||||
value1 = value1.lower()
|
||||
value2 = value2.lower()
|
||||
|
||||
if rl["type"]=="includes":
|
||||
metric: bool = value1 in value2
|
||||
if rl["type"]=="includes_by":
|
||||
metric: bool = value2 in value1
|
||||
if rl["type"]=="fuzzy_match":
|
||||
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
|
||||
if rl["type"]=="exact_match":
|
||||
metric: bool = value1==value2
|
||||
total_metric = total_metric and metric
|
||||
|
||||
metric: bool = total_metric
|
||||
logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
|
||||
# }}} Fuzzy Match for Ranges #
|
||||
|
||||
elif r["type"] == "sparkline":
|
||||
# Compare Sparklines {{{ #
|
||||
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
|
||||
|
||||
@@ -53,51 +53,75 @@
|
||||
],
|
||||
"evaluator": {
|
||||
"postconfig": [
|
||||
{
|
||||
"type": "activate_window",
|
||||
"parameters": {
|
||||
"window_name": "MUST_VISIT.xlsx - LibreOffice Calc",
|
||||
"strict": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 0.5
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"libreoffice",
|
||||
"--convert-to",
|
||||
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
|
||||
"--outdir",
|
||||
"/home/user/Desktop",
|
||||
"/home/user/Desktop/MUST_VISIT.xlsx"
|
||||
"python",
|
||||
"-c",
|
||||
"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 1.0
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_table",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": [
|
||||
"/home/user/Desktop/MUST_VISIT.xlsx",
|
||||
"/home/user/Desktop/MUST_VISIT-Sheet1.csv"
|
||||
],
|
||||
"dest": [
|
||||
"MUST_VISIT.xlsx",
|
||||
"MUST_VISIT-Sheet1.csv"
|
||||
],
|
||||
"multi": true
|
||||
"path": "/home/user/Desktop/MUST_VISIT.xlsx",
|
||||
"dest": "MUST_VISIT.xlsx"
|
||||
},
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"path": [
|
||||
"https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
|
||||
"https://drive.google.com/uc?id=1CGoRQDLw9-Ai7daq3qCz0o9kYSZB2WNn&export=download"
|
||||
],
|
||||
"dest": [
|
||||
"MUST_VISIT-gt.xlsx",
|
||||
"MUST_VISIT-gt-Sheet1.csv"
|
||||
],
|
||||
"multi": true
|
||||
"path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
|
||||
"dest": "MUST_VISIT-gt.xlsx"
|
||||
},
|
||||
"options": {
|
||||
"rules": [
|
||||
{
|
||||
"type": "sheet_print",
|
||||
"type": "sheet_fuzzy",
|
||||
"sheet_idx0": "RNSheet1",
|
||||
"sheet_idx1": "ENSheet1"
|
||||
"sheet_idx1": "ENSheet1",
|
||||
"rules": [
|
||||
{
|
||||
"range": ["A1:A6", "D1:D6"],
|
||||
"type": "exact_match"
|
||||
},
|
||||
{
|
||||
"range": ["B1:B6"],
|
||||
"type": "fuzzy_match",
|
||||
"threshold": 85,
|
||||
"normalization": [
|
||||
["Rd", "Road"],
|
||||
["St", "Street"]
|
||||
],
|
||||
"ignore_case": true
|
||||
},
|
||||
{
|
||||
"range": ["C1:C6"],
|
||||
"type": "includes",
|
||||
"trim_leadings": "+ ",
|
||||
"ignore_chars": " ()-"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user