Fix a bug in multiple apps example

This commit is contained in:
Timothyxxx
2024-03-08 20:39:05 +08:00
parent 365c7798f1
commit 6f0fe4f482
2 changed files with 30 additions and 11 deletions

View File

@@ -2,9 +2,9 @@ import logging
import os
import re
import shutil
from itertools import product
from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag
@@ -94,12 +94,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
elif rule['type'] == "liked_authors_websites_urls":
# Check if "liked authors" folder exists
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
if liked_authors_folder:
# Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url']
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else:
return 0.
else:
@@ -140,15 +152,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
import fitz
from PIL import Image
from io import BytesIO
from borb.pdf import Document
from borb.pdf import PDF
from pathlib import Path
import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
@@ -163,14 +176,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
images.append(img)
return images
def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh:
doc = PDF.loads(fh)
with open(out_path, "wb") as fh:
PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path))
@@ -183,7 +196,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes():
return 0.
return 1.

View File

@@ -23,6 +23,12 @@
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 2
}
},
{
"type": "download",
"parameters": {
@@ -75,10 +81,10 @@
"Liked Authors"
],
"urls": [
"https://jimfan.me/",
"https://ai.stanford.edu/~dahuang/",
"https://yukezhu.me/",
"https://www.eas.caltech.edu/people/anima"
["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
]
}
}