modified libreoffice writer eval examples

2024-01-23 22:02:09 +08:00
parent 42725a00a5
commit 35c4ce99ff
14 changed files with 749 additions and 322 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,7 +1,7 @@
 from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
-from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
+from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers, compare_docx_lines
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
-    compare_insert_equation
+    compare_insert_equation, compare_highlighted_text
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
 from .docs import evaluate_colored_words_in_tables, check_highlighted_words, evaluate_strike_through_last_paragraph, \
    evaluate_conversion, evaluate_spacing, check_italic_font_size_14, evaluate_alignment, get_unique_train_ids, \
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -357,3 +357,31 @@ def check_no_duplicates(initial_file, processed_file):

    # No duplicates found and at least one valid line was processed
    return 1
+
+def compare_docx_lines(file1, file2):
+    # Read the text of the document, line by line
+    doc1 = Document(file1)
+    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
+
+    doc2 = Document(file2)
+    doc2_lines = [p.text.strip() for p in doc2.paragraphs if p.text.strip()]
+
+    # Convert the list of lines to sets and compare
+    return set(doc1_lines) == set(doc2_lines)
+
+def compare_highlighted_text(file1, file2):
+    def extract_highlighted_text(doc):
+        highlighted_text = []
+        # Iterate through each run in each paragraph to check for highlight
+        for paragraph in doc.paragraphs:
+            for run in paragraph.runs:
+                if run.font.highlight_color:  # Checks if the run is highlighted
+                    highlighted_text.append(run.text.strip())
+        return highlighted_text
+
+    # Read the highlighted text from both documents
+    doc1_highlighted = extract_highlighted_text(Document(file1))
+    doc2_highlighted = extract_highlighted_text(Document(file2))
+
+    # Compare the sets of highlighted text to check if they are the same
+    return set(doc1_highlighted) == set(doc2_highlighted)