Merge conflicts

This commit is contained in:
BlankCheng
2024-03-06 21:34:58 +08:00
41 changed files with 2250 additions and 89 deletions

View File

@@ -18,7 +18,8 @@ from .chrome import (
is_expected_search_query,
is_expected_active_tab,
is_expected_url_pattern_match,
is_added_to_steam_cart
is_added_to_steam_cart,
compare_pdf_images
)
from .docs import (
compare_font_names,
@@ -45,7 +46,8 @@ from .docs import (
is_first_line_centered,
check_file_exists,
check_tabstops,
compare_contains_image
compare_contains_image,
compare_docx_images
)
from .general import (
check_csv,
@@ -58,7 +60,8 @@ from .general import (
fuzzy_match,
check_include_exclude,
check_direct_json_object,
diff_text_file
diff_text_file,
literal_match
)
from .gimp import (
check_brightness_decrease_and_structure_sim,
@@ -129,7 +132,8 @@ from .vscode import (
check_json_keybindings,
check_python_file_by_test_suite,
check_python_file_by_gold_file,
check_html_background_image
check_html_background_image,
compare_zip_files
)

View File

@@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
import fitz
from PIL import Image
from io import BytesIO
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
images = []
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
image_list = page.get_images(full=True)
for img_index, img_info in enumerate(image_list):
base_image = pdf_document.extract_image(img_index)
image_bytes = base_image["image"]
images.append(BytesIO(image_bytes))
return images
images1 = extract_images_from_pdf(pdf1_path)
images2 = extract_images_from_pdf(pdf2_path)
if len(images1) != len(images2):
return 0.
for i, (img1, img2) in enumerate(zip(images1, images2), 1):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
return 0.
return 1.
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""

View File

@@ -58,6 +58,8 @@ def contains_page_break(docx_file):
def compare_docx_files(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
ignore_case = options.get('ignore_case', False)
ignore_order = options.get('ignore_order', False)
content_only = options.get('content_only', False)
def get_paragraph_texts_odt(document):
@@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options):
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
elif file1.endswith('.odt') and file2.endswith('.odt'):
doc1 = load(file1)
doc2 = load(file2)
doc1_paragraphs = get_paragraph_texts_odt(doc1)
doc2_paragraphs = get_paragraph_texts_odt(doc2)
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
@@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options):
# Compare the content of the documents
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
similarity = fuzz.ratio(text1, text2) / 100.0
return similarity
@@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options):
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
if text1 != text2:
return 0
else:
@@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options):
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
return 0
@@ -159,6 +173,29 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
from io import BytesIO
from PIL import Image
def compare_docx_images(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
def extract_images(doc):
images = []
for rel in doc.part.rels.values():
if "image" in rel.reltype:
img_data = rel.target_part.blob
images.append(BytesIO(img_data))
return images
images1 = extract_images(doc1)
images2 = extract_images(doc2)
if len(images1) != len(images2):
return 0
for img1, img2 in zip(images1, images2):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
return 0
return 1
def compare_line_spacing(docx_file1, docx_file2):
if not compare_docx_files(docx_file1, docx_file2):

View File

@@ -1,6 +1,7 @@
import csv
import functools
import json
import yaml
import operator
import re
import sqlite3
@@ -39,6 +40,24 @@ def exact_match(result, rules) -> float:
else:
return 0.
def literal_match(result: Any, expected: Any, **options) -> float:
literal_type = options.get('type', 'str')
if literal_type == 'str':
ignore_case = options.get('ignore_case', False)
score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower()
return float(score)
elif literal_type == 'list':
if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected):
return .0
ignore_case = options.get('ignore_case', False)
result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result]
expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected]
return float(result == expected)
else:
raise NotImplementedError(f"Type {type} not supported")
def is_in_list(result, rules) -> float:
expect = rules["expected"]
if expect in result:
@@ -132,11 +151,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
}
def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
"""
Args:
result (str): XML of GNOME Accessibility Tree
rules (Dict[str, Any]): dict like
rules (List[Dict[str, Any]]): list of dict like
{
"selectors": list of str as CSS selectors, will be connected by ", "
to form a composite selector. Only one from `selectors` and
@@ -154,30 +173,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
"""
at: _Element = lxml.etree.fromstring(result)
if "xpath" in rules:
elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in rules:
selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
total_match_score = 1.
for r in rules:
if "xpath" in r:
elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in r:
selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
if len(elements) == 0:
print("no elements")
return 0.
if len(elements) == 0:
print("no elements")
return 0.
if "text" in rules:
match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, rules["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
if "text" in r:
match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
total_match_score *= match_score
return float(match_score)
return float(total_match_score)
# def check_existence(result: str, *args) -> float:
@@ -189,7 +211,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float:
return float(cursor.fetchone()[0] or 0)
def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float:
def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float:
"""
Args:
result (str): path to json file
@@ -204,6 +226,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
],
"unexpect": <the same as `expect`
}
is_yaml (bool): yaml rather than json
Returns:
float
@@ -212,7 +235,10 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
if result is None:
return 0.
with open(result) as f:
result: Dict[str, Any] = json.load(f)
if is_yaml:
result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader)
else:
result: Dict[str, Any] = json.load(f)
expect_rules = rules.get("expect", {})
unexpect_rules = rules.get("unexpect", {})

View File

@@ -12,6 +12,7 @@ import pandas as pd
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
@@ -208,8 +209,10 @@ def compare_table(result: str, expected: str = None, **options) -> float:
for rl in r["rules"]:
for rng in MultiCellRange(rl["range"]):
for cdn in rng.cells:
value1: str = str(read_cell_value(*sheet1, cdn))
value2: str = str(read_cell_value(*sheet2, cdn))
coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0])
value1: str = str(read_cell_value(*sheet1, coordinate))
value2: str = str(read_cell_value(*sheet2, coordinate))
logger.debug("%s: %s vs %s", cdn, value1, value2)
for rplc in rl.get("normalization", []):
value1 = value1.replace(rplc[0], rplc[1])
@@ -230,11 +233,11 @@ def compare_table(result: str, expected: str = None, **options) -> float:
if rl["type"]=="includes":
metric: bool = value1 in value2
if rl["type"]=="includes_by":
elif rl["type"]=="includes_by":
metric: bool = value2 in value1
if rl["type"]=="fuzzy_match":
elif rl["type"]=="fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
if rl["type"]=="exact_match":
elif rl["type"]=="exact_match":
metric: bool = value1==value2
total_metric = total_metric and metric

View File

@@ -311,14 +311,15 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
, namespaces=_xlsx_ns_imapping
)
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
if "@t" not in cell["c"]:
try:
if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
return float(cell["c"]["v"])
if cell["c"]["@t"] == "s":
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
except (KeyError, ValueError):
return None
if cell["c"]["@t"] == "s":
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "n":
return float(cell["c"]["v"])
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
# }}} read_cell_value #

View File

@@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 1.0
return 0.0
import zipfile
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result zip file
expected (str): path to gold zip file
Return:
float: the score
"""
if not actual:
return 0.
with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
file_list1 = set(zip_file1.namelist())
file_list2 = set(zip_file2.namelist())
if file_list1 != file_list2:
return 0.0
for file_name in file_list1:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
return 0.0
return 1.0
def compare_config(actual: str, rules: Dict, **options) -> float:
if not actual: