Fix a bug in multiple apps example

This commit is contained in:
Timothyxxx
2024-03-08 20:39:05 +08:00
parent 365c7798f1
commit 6f0fe4f482
2 changed files with 30 additions and 11 deletions

View File

@@ -2,9 +2,9 @@ import logging
import os import os
import re import re
import shutil import shutil
from itertools import product
from typing import Any, Dict, List, Union from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
@@ -99,7 +99,19 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
# Check if it contains the specified URLs # Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url'] bookmark['type'] == 'url']
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else: else:
return 0. return 0.
else: else:
@@ -140,15 +152,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path) return score / len(pdf2_path)
import fitz import fitz
from PIL import Image from PIL import Image
from io import BytesIO
from borb.pdf import Document from borb.pdf import Document
from borb.pdf import PDF from borb.pdf import PDF
from pathlib import Path from pathlib import Path
import typing import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path): def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path) pdf_document = fitz.open(pdf_path)

View File

@@ -23,6 +23,12 @@
] ]
} }
}, },
{
"type": "sleep",
"parameters": {
"seconds": 2
}
},
{ {
"type": "download", "type": "download",
"parameters": { "parameters": {
@@ -75,10 +81,10 @@
"Liked Authors" "Liked Authors"
], ],
"urls": [ "urls": [
"https://jimfan.me/", ["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
"https://ai.stanford.edu/~dahuang/", ["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
"https://yukezhu.me/", ["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
"https://www.eas.caltech.edu/people/anima" ["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
] ]
} }
} }