Loading libreoffice writer examples and find few problems, will do another round tomorrow for the rest

2024-01-02 17:50:05 +08:00
parent 8ac88e9617
commit 03e99a68fb
22 changed files with 191 additions and 84 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -1,15 +1,20 @@
 import xml.etree.ElementTree as ET
-    
+import os
+from typing import List, Dict, Any
 from docx import Document
+from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

-def find_default_font(expected, config_file_path):
+
+def find_default_font(config_file_path, rules):
    """Find the default font in LibreOffice Writer."""
    default_font = None
+    expected_font = rules["font_name"]
+
    try:
        tree = ET.parse(config_file_path)
        root = tree.getroot()
-        
-       # Define the XML namespace used in the file
+
+        # Define the XML namespace used in the file
        namespace = {'oor': 'http://openoffice.org/2001/registry'}

        # Search for the node containing the default font setting for LibreOffice Writer
@@ -19,24 +24,26 @@ def find_default_font(expected, config_file_path):
                    default_font = value.text
    except Exception as e:
        print(f"Error: {e}")
-    return 1 if default_font == expected else 0
+
+    return 1 if default_font == expected_font else 0


 def contains_page_break(docx_file):
    doc = Document(docx_file)
-    
+
    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
-    
+
    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
            br_elems = run.element.findall('.//w:br', namespaces)
            for br in br_elems:
-                if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
+                if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and \
+                        br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
                    return 1
    return 0

-def compare_docx_files(file1, file2):

+def compare_docx_files(file1, file2):
    doc1 = Document(file1)
    doc2 = Document(file2)

@@ -53,6 +60,7 @@ def compare_docx_files(file1, file2):

    return 1

+
 def compare_docx_tables(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -78,8 +86,8 @@ def compare_docx_tables(docx_file1, docx_file2):

    return 1

-def compare_line_spacing(docx_file1, docx_file2):

+def compare_line_spacing(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -96,12 +104,12 @@ def compare_line_spacing(docx_file1, docx_file2):
            return 0

    return 1
-    
+
+
 def compare_insert_equation(docx_file1, docx_file2):
-    
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
-    
+
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -112,8 +120,10 @@ def compare_insert_equation(docx_file1, docx_file2):
                return 1
    return 0

-def compare_font_names(expected_font, docx_file):
+
+def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
    doc = Document(docx_file)
+    expected_font = rules["font_name"]

    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
@@ -122,6 +132,7 @@ def compare_font_names(expected_font, docx_file):
                return 0
    return 1

+
 def compare_subscript_contains(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -133,6 +144,7 @@ def compare_subscript_contains(docx_file1, docx_file2):
                return 1
    return 0

+
 def has_page_numbers_in_footers(docx_file):
    doc = Document(docx_file)

@@ -146,7 +158,6 @@ def has_page_numbers_in_footers(docx_file):
            return 0
    return 1

-from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

 def is_first_line_centered(docx_file):
    doc = Document(docx_file)
@@ -155,19 +166,20 @@ def is_first_line_centered(docx_file):
    # check if the first line is center justified
    return 1 if first_paragraph.paragraph_format.alignment == WD_PARAGRAPH_ALIGNMENT.CENTER else 0

-import os
-def check_file_exists(directory, filename):

+def check_file_exists(directory, filename):
    file_path = os.path.join(directory, filename)
    return 1 if os.path.isfile(file_path) else 0

+
 def compare_contains_image(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

    for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
        for run1, run2 in zip(para1.runs, para2.runs):
-            if ('graphicData' in run1._element.xml and 'graphicData' not in run2._element.xml) or ('graphicData' not in run1._element.xml and 'graphicData' in run2._element.xml):
+            if ('graphicData' in run1._element.xml and 'graphicData' not in run2._element.xml) or (
+                    'graphicData' not in run1._element.xml and 'graphicData' in run2._element.xml):
                return 0
    return 1

@@ -178,6 +190,6 @@ def compare_contains_image(docx_file1, docx_file2):
 # Replace 'your_document.docx' with the path to your document
 # result = contains_page_break('your_document.docx')
 # print(result)
-    
-#config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
-#print(find_default_font("Ani", config_path))
+
+# config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
+# print(find_default_font("Ani", config_path))