check_highlighted_words modified

2024-01-28 14:36:28 +08:00
parent 394d6353fd
commit 4b8cab0805
2 changed files with 27 additions and 15 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -1,6 +1,8 @@
 import logging
 import os
 import xml.etree.ElementTree as ET
+import zipfile
+import re
 from typing import List, Dict, Any

 from docx import Document
@@ -247,14 +249,24 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
 def check_highlighted_words(file_path1, file_path2):
    if not compare_docx_files(file_path1, file_path2):
        return 0
-    document = Document(file_path1)
+        
+    # Extract content.xml from the .odt file
+    extract_dir = file_path1 + "_extracted"
+    with zipfile.ZipFile(file_path1, 'r') as zip_ref:
+        zip_ref.extractall(extract_dir)
+    content_xml_path = os.path.join(extract_dir, 'content.xml')
+    with open(content_xml_path, 'r') as file:
+        content_xml = file.read()

-    for paragraph in document.paragraphs:
-        for run in paragraph.runs:
-            if run.font.highlight_color is not None:
-                return 0  # Highlighted words found
+    # Check for yellow highlights in the content.xml
+    yellow_highlight_pattern = re.compile(r'(.{0,50}background-color="#ffff00"[^>]*>.{0,50})')
+    yellow_highlight_matches = yellow_highlight_pattern.findall(content_xml)

-    return 1  # No highlighted words found
+    # Return True if yellow highlights are NOT found, otherwise True
+    if yellow_highlight_matches:
+        return 0
+    else:
+        return 1


 def evaluate_strike_through_last_paragraph(file_path1, file_path2):
@@ -415,4 +427,4 @@ def compare_highlighted_text(file1, file2):
    doc2_highlighted = extract_highlighted_text(Document(file2))

    # Compare the sets of highlighted text to check if they are the same
-    return set(doc1_highlighted) == set(doc2_highlighted)
+    return set(doc1_highlighted) == set(doc2_highlighted)
--- a/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json
+++ b/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json
@@ -9,8 +9,8 @@
      "parameters": {
        "files": [
          {
-            "url": "https://drive.usercontent.google.com/download?id=10hgB73d_DoQXQVgUjvgXFUCP1Hd9YxDb&export=download&authuser=0&confirm=t&uuid=845f9616-2fb7-476a-abab-8b620d482ac2&at=APZUnTXB71PxHF7Dq9TC2OL_cRLm:1706199789147",
-            "path": "Desktop/sample-recruitment-phone-script.docx"
+            "url": "https://drive.google.com/uc?id=1Ul4mtQ4SpUNLVDlaiJThPprBe6WTGZ2L&export=download",
+            "path": "Desktop/sample-recruitment-phone-script.odt"
          }
        ]
      }
@@ -18,7 +18,7 @@
    {
      "type": "open",
      "parameters": {
-        "path": "Desktop/sample-recruitment-phone-script.docx"
+        "path": "Desktop/sample-recruitment-phone-script.odt"
      }
    }
  ],
@@ -31,7 +31,7 @@
      {
        "type": "activate_window",
        "parameters": {
-          "window_name": "sample-recruitment-phone-script.docx - LibreOffice Writer",
+          "window_name": "sample-recruitment-phone-script.odt - LibreOffice Writer",
          "strict": true
        }
      },
@@ -55,13 +55,13 @@
    "func": "check_highlighted_words",
    "expected": {
      "type": "cloud_file",
-      "path": "https://drive.usercontent.google.com/download?id=1s9Dsy66-zxbCAgeTyCh0P7AT7P4jF6o3&export=download&authuser=0&confirm=t&uuid=1239f2a1-8c86-45a4-8e7d-36388ac22a69&at=APZUnTVZQzXQAMNsKKQzOw5ppT8A:1706017721589",
-      "dest": "sample-recruitment-phone-script_Gold.docx"
+      "path": "https://drive.google.com/uc?id=12iMkgCYuUyhKUXux96kANLIeud0Wz9ct&export=download",
+      "dest": "sample-recruitment-phone-script_Gold.odt"
    },
    "result": {
      "type": "vm_file",
-      "path": "Desktop/sample-recruitment-phone-script.docx",
-      "dest": "sample-recruitment-phone-script.docx"
+      "path": "Desktop/sample-recruitment-phone-script.odt",
+      "dest": "sample-recruitment-phone-script.odt"
    }
  }
 }