Fix a bug in multiple apps example

This commit is contained in:
Timothyxxx
2024-03-08 20:39:05 +08:00
parent 365c7798f1
commit 6f0fe4f482
2 changed files with 30 additions and 11 deletions

View File

@@ -2,9 +2,9 @@ import logging
import os import os
import re import re
import shutil import shutil
from itertools import product
from typing import Any, Dict, List, Union from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
@@ -94,12 +94,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
elif rule['type'] == "liked_authors_websites_urls": elif rule['type'] == "liked_authors_websites_urls":
# Check if "liked authors" folder exists # Check if "liked authors" folder exists
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None) bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
if liked_authors_folder: if liked_authors_folder:
# Check if it contains the specified URLs # Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url'] bookmark['type'] == 'url']
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else: else:
return 0. return 0.
else: else:
@@ -140,15 +152,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path) return score / len(pdf2_path)
import fitz import fitz
from PIL import Image from PIL import Image
from io import BytesIO
from borb.pdf import Document from borb.pdf import Document
from borb.pdf import PDF from borb.pdf import PDF
from pathlib import Path from pathlib import Path
import typing import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path): def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path) pdf_document = fitz.open(pdf_path)
@@ -163,14 +176,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
images.append(img) images.append(img)
return images return images
def fix_pdf(in_path: Path, out_path: Path) -> None: def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh: with open(in_path, "rb") as fh:
doc = PDF.loads(fh) doc = PDF.loads(fh)
with open(out_path, "wb") as fh: with open(out_path, "wb") as fh:
PDF.dumps(fh, doc) PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path)) fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path)) fix_pdf(Path(pdf2_path), Path(pdf2_path))
@@ -183,7 +196,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
for img1, img2 in zip(images1, images2): for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes(): if img1.tobytes() != img2.tobytes():
return 0. return 0.
return 1. return 1.

View File

@@ -23,6 +23,12 @@
] ]
} }
}, },
{
"type": "sleep",
"parameters": {
"seconds": 2
}
},
{ {
"type": "download", "type": "download",
"parameters": { "parameters": {
@@ -75,10 +81,10 @@
"Liked Authors" "Liked Authors"
], ],
"urls": [ "urls": [
"https://jimfan.me/", ["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
"https://ai.stanford.edu/~dahuang/", ["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
"https://yukezhu.me/", ["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
"https://www.eas.caltech.edu/people/anima" ["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
] ]
} }
} }