update multi apps

This commit is contained in:
tsuky_chen
2024-03-07 17:20:51 +08:00
parent e295430bcf
commit 807a95a230
20 changed files with 104 additions and 63 deletions

View File

@@ -143,6 +143,11 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
import fitz
from PIL import Image
from io import BytesIO
from borb.pdf import Document
from borb.pdf import PDF
from pathlib import Path
import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
@@ -151,24 +156,32 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
image_list = page.get_images(full=True)
pixmap = page.get_pixmap()
for img_index, img_info in enumerate(image_list):
base_image = pdf_document.extract_image(img_index)
image_bytes = base_image["image"]
img = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples)
images.append(BytesIO(image_bytes))
images.append(img)
return images
def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh:
doc = PDF.loads(fh)
with open(out_path, "wb") as fh:
PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path))
images1 = extract_images_from_pdf(pdf1_path)
images2 = extract_images_from_pdf(pdf2_path)
if len(images1) != len(images2):
return 0.
for i, (img1, img2) in enumerate(zip(images1, images2), 1):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes():
return 0.
return 1.