ver Mar8th
fixed a task and a metric
This commit is contained in:
@@ -194,7 +194,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
|||||||
# sheet_idx1: as sheet_idx0
|
# sheet_idx1: as sheet_idx0
|
||||||
# rules: list of dict, each dict is like
|
# rules: list of dict, each dict is like
|
||||||
# { "range": ["A1:B6", "C2:E5"],
|
# { "range": ["A1:B6", "C2:E5"],
|
||||||
# "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
|
# "type": "includes" | "included_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
|
||||||
# "threshold": 85, // for fuzzy match
|
# "threshold": 85, // for fuzzy match
|
||||||
# "ignore_case": true | false,
|
# "ignore_case": true | false,
|
||||||
# "ignore_chars": " ()", # filtered out
|
# "ignore_chars": " ()", # filtered out
|
||||||
@@ -232,9 +232,9 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
|||||||
value2 = value2.lower()
|
value2 = value2.lower()
|
||||||
|
|
||||||
if rl["type"]=="includes":
|
if rl["type"]=="includes":
|
||||||
metric: bool = value1 in value2
|
|
||||||
elif rl["type"]=="includes_by":
|
|
||||||
metric: bool = value2 in value1
|
metric: bool = value2 in value1
|
||||||
|
elif rl["type"]=="included_by":
|
||||||
|
metric: bool = value1 in value2
|
||||||
elif rl["type"]=="fuzzy_match":
|
elif rl["type"]=="fuzzy_match":
|
||||||
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
|
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
|
||||||
elif rl["type"]=="exact_match":
|
elif rl["type"]=="exact_match":
|
||||||
|
|||||||
@@ -274,7 +274,8 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
|
|||||||
# }}} function load_pivot_tables #
|
# }}} function load_pivot_tables #
|
||||||
|
|
||||||
|
|
||||||
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
|
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
|
||||||
|
_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)
|
||||||
|
|
||||||
|
|
||||||
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||||
@@ -285,7 +286,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
with z_f.open("xl/sharedStrings.xml") as f:
|
with z_f.open("xl/sharedStrings.xml") as f:
|
||||||
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
|
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
|
||||||
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
|
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
|
||||||
shared_strs: List[str] = [elm.text for elm in str_elements]
|
shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
|
||||||
|
for elm in str_elements
|
||||||
|
]
|
||||||
except:
|
except:
|
||||||
logger.debug("Read shared strings error: %s", xlsx_file)
|
logger.debug("Read shared strings error: %s", xlsx_file)
|
||||||
|
|
||||||
|
|||||||
@@ -64,35 +64,40 @@
|
|||||||
"func": "compare_table",
|
"func": "compare_table",
|
||||||
"result": {
|
"result": {
|
||||||
"type": "vm_file",
|
"type": "vm_file",
|
||||||
"path": [
|
"path": "/home/user/authors.xlsx",
|
||||||
"/home/user/authors.xlsx",
|
"dest": "authors.xlsx"
|
||||||
"/home/user/authors-Sheet1.csv"
|
|
||||||
],
|
|
||||||
"dest": [
|
|
||||||
"authors.xlsx",
|
|
||||||
"authors-Sheet1.csv"
|
|
||||||
],
|
|
||||||
"multi": true
|
|
||||||
},
|
},
|
||||||
"expected": {
|
"expected": {
|
||||||
"type": "cloud_file",
|
"type": "cloud_file",
|
||||||
"path": [
|
"path": "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
|
||||||
"https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
|
"dest": "authors-gt.xlsx"
|
||||||
"https://drive.google.com/uc?id=1fq4hbk1g9R_SjknzwFAqvyF1ICyNYfok&export=download"
|
|
||||||
],
|
|
||||||
"dest": [
|
|
||||||
"authors-gt.xlsx",
|
|
||||||
"authors-gt-Sheet1.csv"
|
|
||||||
],
|
|
||||||
"multi": true
|
|
||||||
},
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"rules": [
|
"rules": [
|
||||||
{
|
{
|
||||||
"type": "sheet_print",
|
"type": "sheet_fuzzy",
|
||||||
"sheet_idx0": "RNSheet1",
|
"sheet_idx0": "RNSheet1",
|
||||||
"sheet_idx1": "ENSheet1",
|
"sheet_idx1": "ENSheet1",
|
||||||
"ignore_case": true
|
"rules": [
|
||||||
|
{
|
||||||
|
"range": ["A1:C1"],
|
||||||
|
"type": "includes",
|
||||||
|
"ignore_case": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": ["A2:B5"],
|
||||||
|
"type": "exact_match",
|
||||||
|
"trim_leadings": " ",
|
||||||
|
"trim_trailings": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": ["C2:C5"],
|
||||||
|
"type": "exact_match",
|
||||||
|
"trim_leadings": " ",
|
||||||
|
"trim_trailings": " ",
|
||||||
|
"ignore_case": true
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user