diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a45c300..3e82e7d 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -1,6 +1,8 @@ import logging import os import xml.etree.ElementTree as ET +import zipfile +import re from typing import List, Dict, Any from docx import Document @@ -247,14 +249,24 @@ def evaluate_colored_words_in_tables(file_path1, file_path2): def check_highlighted_words(file_path1, file_path2): if not compare_docx_files(file_path1, file_path2): return 0 - document = Document(file_path1) + + # Extract content.xml from the .odt file + extract_dir = file_path1 + "_extracted" + with zipfile.ZipFile(file_path1, 'r') as zip_ref: + zip_ref.extractall(extract_dir) + content_xml_path = os.path.join(extract_dir, 'content.xml') + with open(content_xml_path, 'r') as file: + content_xml = file.read() - for paragraph in document.paragraphs: - for run in paragraph.runs: - if run.font.highlight_color is not None: - return 0 # Highlighted words found + # Check for yellow highlights in the content.xml + yellow_highlight_pattern = re.compile(r'(.{0,50}background-color="#ffff00"[^>]*>.{0,50})') + yellow_highlight_matches = yellow_highlight_pattern.findall(content_xml) - return 1 # No highlighted words found + # Return True if yellow highlights are NOT found, otherwise True + if yellow_highlight_matches: + return 0 + else: + return 1 def evaluate_strike_through_last_paragraph(file_path1, file_path2): @@ -415,4 +427,4 @@ def compare_highlighted_text(file1, file2): doc2_highlighted = extract_highlighted_text(Document(file2)) # Compare the sets of highlighted text to check if they are the same - return set(doc1_highlighted) == set(doc2_highlighted) + return set(doc1_highlighted) == set(doc2_highlighted) \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json b/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json index 3280355..df69215 100644 --- a/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json +++ b/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json @@ -9,8 +9,8 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=10hgB73d_DoQXQVgUjvgXFUCP1Hd9YxDb&export=download&authuser=0&confirm=t&uuid=845f9616-2fb7-476a-abab-8b620d482ac2&at=APZUnTXB71PxHF7Dq9TC2OL_cRLm:1706199789147", - "path": "Desktop/sample-recruitment-phone-script.docx" + "url": "https://drive.google.com/uc?id=1Ul4mtQ4SpUNLVDlaiJThPprBe6WTGZ2L&export=download", + "path": "Desktop/sample-recruitment-phone-script.odt" } ] } @@ -18,7 +18,7 @@ { "type": "open", "parameters": { - "path": "Desktop/sample-recruitment-phone-script.docx" + "path": "Desktop/sample-recruitment-phone-script.odt" } } ], @@ -31,7 +31,7 @@ { "type": "activate_window", "parameters": { - "window_name": "sample-recruitment-phone-script.docx - LibreOffice Writer", + "window_name": "sample-recruitment-phone-script.odt - LibreOffice Writer", "strict": true } }, @@ -55,13 +55,13 @@ "func": "check_highlighted_words", "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1s9Dsy66-zxbCAgeTyCh0P7AT7P4jF6o3&export=download&authuser=0&confirm=t&uuid=1239f2a1-8c86-45a4-8e7d-36388ac22a69&at=APZUnTVZQzXQAMNsKKQzOw5ppT8A:1706017721589", - "dest": "sample-recruitment-phone-script_Gold.docx" + "path": "https://drive.google.com/uc?id=12iMkgCYuUyhKUXux96kANLIeud0Wz9ct&export=download", + "dest": "sample-recruitment-phone-script_Gold.odt" }, "result": { "type": "vm_file", - "path": "Desktop/sample-recruitment-phone-script.docx", - "dest": "sample-recruitment-phone-script.docx" + "path": "Desktop/sample-recruitment-phone-script.odt", + "dest": "sample-recruitment-phone-script.odt" } } } \ No newline at end of file