Fix a bug in multiple apps example
This commit is contained in:
@@ -2,9 +2,9 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
from itertools import product
|
||||||
from typing import Any, Dict, List, Union
|
from typing import Any, Dict, List, Union
|
||||||
|
|
||||||
import fitz # PyMuPDF
|
|
||||||
import rapidfuzz.fuzz as fuzz
|
import rapidfuzz.fuzz as fuzz
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
@@ -94,12 +94,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
|
|||||||
elif rule['type'] == "liked_authors_websites_urls":
|
elif rule['type'] == "liked_authors_websites_urls":
|
||||||
# Check if "liked authors" folder exists
|
# Check if "liked authors" folder exists
|
||||||
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
|
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||||
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
|
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
|
||||||
if liked_authors_folder:
|
if liked_authors_folder:
|
||||||
# Check if it contains the specified URLs
|
# Check if it contains the specified URLs
|
||||||
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
|
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
|
||||||
bookmark['type'] == 'url']
|
bookmark['type'] == 'url']
|
||||||
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
|
|
||||||
|
urls = rule['urls']
|
||||||
|
|
||||||
|
for idx, url in enumerate(urls):
|
||||||
|
if isinstance(url, str):
|
||||||
|
urls[idx] = [url]
|
||||||
|
|
||||||
|
combinations = product(*urls)
|
||||||
|
|
||||||
|
for combination in combinations:
|
||||||
|
if set(combination) == set(liked_authors_urls):
|
||||||
|
return 1.
|
||||||
|
return 0.
|
||||||
else:
|
else:
|
||||||
return 0.
|
return 0.
|
||||||
else:
|
else:
|
||||||
@@ -140,15 +152,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
|
|||||||
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
|
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
|
||||||
return score / len(pdf2_path)
|
return score / len(pdf2_path)
|
||||||
|
|
||||||
|
|
||||||
import fitz
|
import fitz
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
|
||||||
from borb.pdf import Document
|
from borb.pdf import Document
|
||||||
from borb.pdf import PDF
|
from borb.pdf import PDF
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
||||||
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||||
def extract_images_from_pdf(pdf_path):
|
def extract_images_from_pdf(pdf_path):
|
||||||
pdf_document = fitz.open(pdf_path)
|
pdf_document = fitz.open(pdf_path)
|
||||||
@@ -163,14 +176,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
|||||||
images.append(img)
|
images.append(img)
|
||||||
|
|
||||||
return images
|
return images
|
||||||
|
|
||||||
def fix_pdf(in_path: Path, out_path: Path) -> None:
|
def fix_pdf(in_path: Path, out_path: Path) -> None:
|
||||||
doc: typing.Optional[Document] = None
|
doc: typing.Optional[Document] = None
|
||||||
with open(in_path, "rb") as fh:
|
with open(in_path, "rb") as fh:
|
||||||
doc = PDF.loads(fh)
|
doc = PDF.loads(fh)
|
||||||
with open(out_path, "wb") as fh:
|
with open(out_path, "wb") as fh:
|
||||||
PDF.dumps(fh, doc)
|
PDF.dumps(fh, doc)
|
||||||
|
|
||||||
fix_pdf(Path(pdf1_path), Path(pdf1_path))
|
fix_pdf(Path(pdf1_path), Path(pdf1_path))
|
||||||
fix_pdf(Path(pdf2_path), Path(pdf2_path))
|
fix_pdf(Path(pdf2_path), Path(pdf2_path))
|
||||||
|
|
||||||
@@ -183,7 +196,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
|||||||
for img1, img2 in zip(images1, images2):
|
for img1, img2 in zip(images1, images2):
|
||||||
if img1.tobytes() != img2.tobytes():
|
if img1.tobytes() != img2.tobytes():
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
return 1.
|
return 1.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,12 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "sleep",
|
||||||
|
"parameters": {
|
||||||
|
"seconds": 2
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "download",
|
"type": "download",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
@@ -75,10 +81,10 @@
|
|||||||
"Liked Authors"
|
"Liked Authors"
|
||||||
],
|
],
|
||||||
"urls": [
|
"urls": [
|
||||||
"https://jimfan.me/",
|
["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
|
||||||
"https://ai.stanford.edu/~dahuang/",
|
["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
|
||||||
"https://yukezhu.me/",
|
["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
|
||||||
"https://www.eas.caltech.edu/people/anima"
|
["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user