Merge branch 'zdy'

This commit is contained in:
David Chang
2024-03-08 13:30:25 +08:00
3 changed files with 33 additions and 25 deletions

View File

@@ -194,7 +194,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
# sheet_idx1: as sheet_idx0
# rules: list of dict, each dict is like
# { "range": ["A1:B6", "C2:E5"],
# "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
# "type": "includes" | "included_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
# "threshold": 85, // for fuzzy match
# "ignore_case": true | false,
# "ignore_chars": " ()", # filtered out
@@ -232,9 +232,9 @@ def compare_table(result: str, expected: str = None, **options) -> float:
value2 = value2.lower()
if rl["type"]=="includes":
metric: bool = value1 in value2
elif rl["type"]=="includes_by":
metric: bool = value2 in value1
elif rl["type"]=="included_by":
metric: bool = value1 in value2
elif rl["type"]=="fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
elif rl["type"]=="exact_match":

View File

@@ -274,7 +274,8 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
# }}} function load_pivot_tables #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
@@ -285,7 +286,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
for elm in str_elements
]
except:
logger.debug("Read shared strings error: %s", xlsx_file)

View File

@@ -64,35 +64,40 @@
"func": "compare_table",
"result": {
"type": "vm_file",
"path": [
"/home/user/authors.xlsx",
"/home/user/authors-Sheet1.csv"
],
"dest": [
"authors.xlsx",
"authors-Sheet1.csv"
],
"multi": true
"path": "/home/user/authors.xlsx",
"dest": "authors.xlsx"
},
"expected": {
"type": "cloud_file",
"path": [
"https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
"https://drive.google.com/uc?id=1fq4hbk1g9R_SjknzwFAqvyF1ICyNYfok&export=download"
],
"dest": [
"authors-gt.xlsx",
"authors-gt-Sheet1.csv"
],
"multi": true
"path": "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
"dest": "authors-gt.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_print",
"type": "sheet_fuzzy",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1",
"ignore_case": true
"rules": [
{
"range": ["A1:C1"],
"type": "includes",
"ignore_case": true
},
{
"range": ["A2:B5"],
"type": "exact_match",
"trim_leadings": " ",
"trim_trailings": " "
},
{
"range": ["C2:C5"],
"type": "exact_match",
"trim_leadings": " ",
"trim_trailings": " ",
"ignore_case": true
}
]
}
]
}