Merge branch 'main' into zdy

This commit is contained in:
David Chang
2024-03-06 15:12:41 +08:00
23 changed files with 1284 additions and 3 deletions

View File

@@ -18,7 +18,8 @@ from .chrome import (
is_expected_search_query,
is_expected_active_tab,
is_expected_url_pattern_match,
is_added_to_steam_cart
is_added_to_steam_cart,
compare_pdf_images
)
from .docs import (
compare_font_names,
@@ -45,7 +46,8 @@ from .docs import (
is_first_line_centered,
check_file_exists,
check_tabstops,
compare_contains_image
compare_contains_image,
compare_docx_images
)
from .general import (
check_csv,
@@ -125,7 +127,8 @@ from .vscode import (
check_json_settings,
check_json_keybindings,
check_python_file_by_test_suite,
check_python_file_by_gold_file
check_python_file_by_gold_file,
compare_zip_files
)

View File

@@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
import fitz
from PIL import Image
from io import BytesIO
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
images = []
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
image_list = page.get_images(full=True)
for img_index, img_info in enumerate(image_list):
base_image = pdf_document.extract_image(img_index)
image_bytes = base_image["image"]
images.append(BytesIO(image_bytes))
return images
images1 = extract_images_from_pdf(pdf1_path)
images2 = extract_images_from_pdf(pdf2_path)
if len(images1) != len(images2):
return 0.
for i, (img1, img2) in enumerate(zip(images1, images2), 1):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
return 0.
return 1.
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""

View File

@@ -159,6 +159,29 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
from io import BytesIO
from PIL import Image
def compare_docx_images(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
def extract_images(doc):
images = []
for rel in doc.part.rels.values():
if "image" in rel.reltype:
img_data = rel.target_part.blob
images.append(BytesIO(img_data))
return images
images1 = extract_images(doc1)
images2 = extract_images(doc2)
if len(images1) != len(images2):
return 0
for img1, img2 in zip(images1, images2):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
return 0
return 1
def compare_line_spacing(docx_file1, docx_file2):
if not compare_docx_files(docx_file1, docx_file2):

View File

@@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 1.0
return 0.0
import zipfile
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result zip file
expected (str): path to gold zip file
Return:
float: the score
"""
if not actual:
return 0.
with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
file_list1 = set(zip_file1.namelist())
file_list2 = set(zip_file2.namelist())
if file_list1 != file_list2:
return 0.0
for file_name in file_list1:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
return 0.0
return 1.0
def compare_config(actual: str, rules: Dict, **options) -> float:
if not actual: