From bba367b8bcc5d4e73f1e5bcfeb502f4b2185dcc7 Mon Sep 17 00:00:00 2001 From: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:58:43 +0800 Subject: [PATCH] fix: fix multiapps tasks (#231) * Update JSON example for multi_apps: change snapshot name and specify presenter in instructions for clarity. * Enhance PDF image comparison in chrome.py by adding existence checks for input files and improving image extraction logic. Introduce image hashing for similarity scoring with a configurable threshold. Update docs.py to support fuzzy matching in DOCX file comparisons, allowing for similarity scoring based on text content. Modify example JSON to enable fuzzy matching option. --------- Co-authored-by: yuanmengqi --- desktop_env/evaluators/metrics/chrome.py | 69 ++++++++++++++----- desktop_env/evaluators/metrics/docs.py | 52 +++++++++----- .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 4 +- .../aad10cd7-9337-4b62-b704-a857848cedf2.json | 3 + 4 files changed, 91 insertions(+), 37 deletions(-) diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index cc12e29..494e8cf 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -2,6 +2,7 @@ import logging import os import re import shutil +import io from itertools import product from typing import Any, Dict, List, Union @@ -172,6 +173,7 @@ import fitz from PIL import Image from borb.pdf import Document from borb.pdf import PDF +import imagehash from pathlib import Path import typing @@ -180,6 +182,9 @@ import typing def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: if not pdf1_path or not pdf2_path: return 0. + if not all(map(os.path.exists, [pdf1_path, pdf2_path])): + logger.warning(f"PDF file does not exist: {pdf1_path} or {pdf2_path}") + return 0. def extract_images_from_pdf(pdf_path): pdf_document = fitz.open(pdf_path) @@ -187,35 +192,61 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: for page_number in range(pdf_document.page_count): page = pdf_document[page_number] - pixmap = page.get_pixmap() - - img = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples) - - images.append(img) + for img_index, img in enumerate(page.get_images(full=True)): + xref = img[0] + base_image = pdf_document.extract_image(xref) + image_bytes = base_image["image"] + + # convert to PIL Image + try: + pil_image = Image.open(io.BytesIO(image_bytes)) + images.append(pil_image) + except Exception as e: + logger.error(f"Failed to process image in {pdf_path} on page {page_number}: {e}") return images + + temp_dir = Path(pdf1_path).parent / "temp_pdf_comparison" + os.makedirs(temp_dir, exist_ok=True) + + temp_pdf1 = temp_dir / Path(pdf1_path).name + temp_pdf2 = temp_dir / Path(pdf2_path).name - def fix_pdf(in_path: Path, out_path: Path) -> None: - doc: typing.Optional[Document] = None - with open(in_path, "rb") as fh: - doc = PDF.loads(fh) - with open(out_path, "wb") as fh: - PDF.dumps(fh, doc) + shutil.copy(pdf1_path, temp_pdf1) + shutil.copy(pdf2_path, temp_pdf2) - fix_pdf(Path(pdf1_path), Path(pdf1_path)) - fix_pdf(Path(pdf2_path), Path(pdf2_path)) + try: + images1 = extract_images_from_pdf(str(temp_pdf1)) + images2 = extract_images_from_pdf(str(temp_pdf2)) + except Exception as e: + logger.error(f"Error extracting images from PDFs: {e}") + shutil.rmtree(temp_dir) + return 0. + finally: + shutil.rmtree(temp_dir) - images1 = extract_images_from_pdf(pdf1_path) - images2 = extract_images_from_pdf(pdf2_path) if len(images1) != len(images2): + logger.info(f"Different number of images found. Gold: {len(images1)}, Pred: {len(images2)}") return 0. - for img1, img2 in zip(images1, images2): - if img1.tobytes() != img2.tobytes(): - return 0. + if not images1: + logger.info("No images found in either PDF. Considering it a match.") + return 1.0 - return 1. + hash_threshold = 5 + total_score = 0 + for i, (img1, img2) in enumerate(zip(images1, images2)): + hash1 = imagehash.phash(img1) + hash2 = imagehash.phash(img2) + hash_diff = hash1 - hash2 + + logger.info(f"Image {i+1}: Gold hash: {hash1}, Pred hash: {hash2}, Hash difference: {hash_diff}") + + if hash_diff <= hash_threshold: + total_score +=1 + + return total_score / len(images1) def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 908a387..9397d45 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -86,6 +86,7 @@ def compare_docx_files(file1, file2, **options): ignore_case = options.get('ignore_case', False) ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) + fuzzy_match = options.get('fuzzy_match', False) if not file1 or not file2: return 0 @@ -151,29 +152,48 @@ def compare_docx_files(file1, file2, **options): text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() if ignore_case: text1, text2 = text1.lower(), text2.lower() - if text1 != text2: - return 0 + + if fuzzy_match: + similarity = fuzz.ratio(text1, text2) / 100.0 + return similarity + else: + if text1 != text2: + return 0 else: - print("ignore_blanks=false") if len(doc1_paragraphs) != len(doc2_paragraphs): print(doc1_paragraphs) print(doc2_paragraphs) print(len(doc1_paragraphs)) print(len(doc2_paragraphs)) return 0 - print("in compare") - # Compare each paragraph - for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): - if ignore_case: - p1, p2 = p1.lower(), p2.lower() - if p1 != p2: - # show the difference - print("=== First Paragraph ===") - print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars - print("=== Second Paragraph ===") - print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars - print("=" * 50) # Clear boundary - return 0 + + if fuzzy_match: + total_similarity = 0 + if not doc1_paragraphs: + return 1.0 + for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if ignore_case: + p1, p2 = p1.lower(), p2.lower() + total_similarity += fuzz.ratio(p1, p2) / 100.0 + + if len(doc1_paragraphs) == 0: + return 1.0 if len(doc2_paragraphs) == 0 else 0.0 + + avg_similarity = total_similarity / len(doc1_paragraphs) + return avg_similarity + else: + # Compare each paragraph + for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if ignore_case: + p1, p2 = p1.lower(), p2.lower() + if p1 != p2: + # show the difference + print("=== First Paragraph ===") + print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars + print("=== Second Paragraph ===") + print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars + print("=" * 50) # Clear boundary + return 0 return 1 diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json index e60ba1a..deaa36d 100644 --- a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -1,7 +1,7 @@ { "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", - "snapshot": "libreoffice_calc", - "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file named 'presenter.zip' so I can easily share it with others later.", + "snapshot": "multi_apps", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters (a.k.a. Tao Yu) and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file named 'presenter.zip' so I can easily share it with others later.", "source": "authors", "config": [ { diff --git a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json index ac60aa1..239bc46 100644 --- a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json +++ b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json @@ -65,6 +65,9 @@ "type": "vm_file", "path": "/home/user/Desktop/notes.docx", "dest": "notes.docx" + }, + "options": { + "fuzzy_match": true } }, "proxy": false