Merge branch 'main' into zdy

2024-03-06 15:12:41 +08:00
parent 5817403e2e ef7e8942af
commit 881728e11c
23 changed files with 1284 additions and 3 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -18,7 +18,8 @@ from .chrome import (
    is_expected_search_query,
    is_expected_active_tab,
    is_expected_url_pattern_match,
-    is_added_to_steam_cart
+    is_added_to_steam_cart,
+    compare_pdf_images
 )
 from .docs import (
    compare_font_names,
@@ -45,7 +46,8 @@ from .docs import (
    is_first_line_centered,
    check_file_exists,
    check_tabstops,
-    compare_contains_image
+    compare_contains_image,
+    compare_docx_images
 )
 from .general import (
    check_csv,
@@ -125,7 +127,8 @@ from .vscode import (
    check_json_settings,
    check_json_keybindings,
    check_python_file_by_test_suite,
-    check_python_file_by_gold_file
+    check_python_file_by_gold_file,
+    compare_zip_files
 )


--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
            logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
    return score / len(pdf2_path)

+import fitz
+from PIL import Image
+from io import BytesIO
+
+def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
+    def extract_images_from_pdf(pdf_path):
+        pdf_document = fitz.open(pdf_path)
+        images = []
+
+        for page_number in range(pdf_document.page_count):
+            page = pdf_document[page_number]
+            image_list = page.get_images(full=True)
+
+            for img_index, img_info in enumerate(image_list):
+                base_image = pdf_document.extract_image(img_index)
+                image_bytes = base_image["image"]
+
+                images.append(BytesIO(image_bytes))
+
+        return images
+    
+    images1 = extract_images_from_pdf(pdf1_path)
+    images2 = extract_images_from_pdf(pdf2_path)
+
+    if len(images1) != len(images2):
+        return 0.
+
+    for i, (img1, img2) in enumerate(zip(images1, images2), 1):
+        if Image.open(img1).tobytes() != Image.open(img2).tobytes():
+            return 0.
+    
+    return 1.
+

 def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
    """
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -159,6 +159,29 @@ def compare_docx_tables(docx_file1, docx_file2):

    return 1

+from io import BytesIO
+from PIL import Image
+
+def compare_docx_images(docx_file1, docx_file2):
+    doc1 = Document(docx_file1)
+    doc2 = Document(docx_file2)
+
+    def extract_images(doc):
+        images = []
+        for rel in doc.part.rels.values():
+            if "image" in rel.reltype:
+                img_data = rel.target_part.blob
+                images.append(BytesIO(img_data))
+        return images
+    
+    images1 = extract_images(doc1)
+    images2 = extract_images(doc2)
+    if len(images1) != len(images2):
+        return 0
+    for img1, img2 in zip(images1, images2):
+        if Image.open(img1).tobytes() != Image.open(img2).tobytes():
+            return 0
+    return 1

 def compare_line_spacing(docx_file1, docx_file2):
    if not compare_docx_files(docx_file1, docx_file2):
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
        return 1.0
    return 0.0

+import zipfile
+
+def compare_zip_files(actual: str, expected: str, **options) -> float:
+    """
+    Args:
+        actual (str): path to result zip file
+        expected (str): path to gold zip file
+
+    Return:
+        float: the score
+    """
+    if not actual:
+        return 0.
+
+    with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
+        file_list1 = set(zip_file1.namelist())
+        file_list2 = set(zip_file2.namelist())
+
+        if file_list1 != file_list2:
+            return 0.0
+        
+        for file_name in file_list1:
+            content1 = zip_file1.read(file_name)
+            content2 = zip_file2.read(file_name)
+
+            if content1 != content2:
+                return 0.0
+    return 1.0
+

 def compare_config(actual: str, rules: Dict, **options) -> float:
    if not actual: