Merge remote-tracking branch 'origin/main'

2024-01-23 14:38:08 +08:00
parent 303c89411e 42725a00a5
commit 29d12eade4
15 changed files with 540 additions and 19 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -3,6 +3,9 @@ from .docs import compare_font_names, compare_subscript_contains, has_page_numbe
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
    compare_insert_equation
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
+from .docs import evaluate_colored_words_in_tables, check_highlighted_words, evaluate_strike_through_last_paragraph, \
+    evaluate_conversion, evaluate_spacing, check_italic_font_size_14, evaluate_alignment, get_unique_train_ids, \
+    check_no_duplicates           
 from .general import exact_match, fuzzy_match, check_csv, check_accessibility_tree, check_list
 from .libreoffice import check_libre_locale
 from .pdf import check_pdf_pages
@@ -13,7 +16,7 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co
 from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
 from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3, check_json
 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
-from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed
+from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings
 from .impress import check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \
    check_for_audio, check_formula_shape
 from .impress import check_slide_orientation_Portrait, contains_mp4_video
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -5,6 +5,7 @@ from typing import List, Dict, Any

 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
+from docx.shared import RGBColor

 logger = logging.getLogger("desktopenv.metric.docs")

@@ -197,3 +198,162 @@ def compare_contains_image(docx_file1, docx_file2):

 # config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
 # print(find_default_font("Ani", config_path))
+
+
+def evaluate_colored_words_in_tables(file_path):
+    document = Document(file_path)
+
+    for table in document.tables:
+        # Iterate through rows and cells in the table
+        for row in table.rows:
+            for cell in row.cells:
+                for paragraph in cell.paragraphs:
+                    for run in paragraph.runs:
+                        word = run.text
+                        if word:
+                            first_letter = word[0].lower()
+
+                            if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
+                                return 0  # Vowel-colored words should be red
+                            elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
+                                return 0  # Non-vowel-colored words should be blue
+
+    return 1  # All words in tables are correctly colored
+
+
+def check_highlighted_words(file_path):
+    document = Document(file_path)
+
+    for paragraph in document.paragraphs:
+        for run in paragraph.runs:
+            if run.font.highlight_color is not None:
+                return 0  # Highlighted words found
+
+    return 1  # No highlighted words found
+
+
+def evaluate_strike_through_last_paragraph(file_path):
+    document = Document(file_path)
+
+    # Get the last paragraph
+    last_paragraph = document.paragraphs[-1]
+
+    # Check if any run in the last paragraph has strike-through formatting
+    for run in last_paragraph.runs:
+        if not run.font.strike:
+            return 0  # At least one word does not have strike-through formatting
+
+    return 1  # All words in the last paragraph have strike-through formatting
+
+
+def evaluate_conversion(file_path):
+    document = Document(file_path)
+
+    for table in document.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                for paragraph in cell.paragraphs:
+                    for run in paragraph.runs:
+                        if run.text.isupper():
+                            return 0  # Uppercase text should be converted to lowercase
+
+    for paragraph in document.paragraphs:
+        for run in paragraph.runs:
+            if run.text.isupper():
+                return 0  # Uppercase text should be converted to lowercase
+
+    return 1  # All uppercase text has been successfully converted
+
+
+def evaluate_spacing(file_path):
+    document = Document(file_path)
+
+    # Check line spacing for introduction, body, and conclusion
+    introduction_spacing = document.paragraphs[0].paragraph_format.line_spacing
+    body_spacing = document.paragraphs[1].paragraph_format.line_spacing
+    conclusion_spacing = document.paragraphs[2].paragraph_format.line_spacing
+    if (introduction_spacing == 1.0 and body_spacing == 2.0 and conclusion_spacing == 1.5):
+      return 1
+    else:
+      return 0
+
+
+def check_italic_font_size_14(path):
+    document = Document(path)
+    for paragraph in document.paragraphs:
+        for run in paragraph.runs:
+            if run.italic:
+                # Check if font size is 14
+                if run.font.size is None or run.font.size.pt != 14:
+                    return 0
+    return 1
+
+
+def evaluate_alignment(docx_path):
+    # Load the document
+    doc = Document(docx_path)
+
+    # Iterate through each paragraph in the document
+    for para in doc.paragraphs:
+        # Split the paragraph into individual sentences
+        sentences = para.text.split('.')
+
+        for sentence in sentences:
+            # Split the sentence into words
+            words = sentence.strip().split()
+
+            # Check if the sentence has at least three words
+            if len(words) < 3:
+                continue  # Skip sentences with less than three words
+
+            # The first three words should be separated from the rest
+            first_part = ' '.join(words[:3])
+            second_part = ' '.join(words[3:])
+
+            # Check if the sentence structure matches the pattern: first part + large space/tab + second part
+            if not (first_part in sentence and second_part in sentence and sentence.find(first_part) < sentence.find(second_part)):
+                return 0  # The sentence does not meet the alignment criteria
+
+    return 1  # All sentences meet the alignment criteria
+
+
+def get_unique_train_ids(initial_file): #fixed standard
+    doc = Document(initial_file)
+    train_ids = set()
+    processed_lines = 0
+
+    for para in doc.paragraphs:
+        line_parts = para.text.split(',')
+        if len(line_parts) == 4:
+            train_id = line_parts[1].strip()
+            if train_id not in train_ids:
+              train_ids.add(train_id)
+              processed_lines += 1
+
+    return train_ids, processed_lines
+
+def check_no_duplicates(initial_file, processed_file):
+    # Open the document
+    train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
+    doc_processed = Document(processed_file)
+    train_ids_pro = set()
+    processed_lines = 0  # Counter for valid lines processed
+
+    # processed
+    for para in doc_processed.paragraphs:
+        # Each line has the format: time_HH:MM:SS, train_id, station_id, platform_no
+        line_parts = para.text.split(',')
+        # Ensure the line has the correct format
+        if len(line_parts) == 4:
+            train_id = line_parts[1].strip()
+            # If train_id is already in the set, it's a duplicate
+            if train_id in train_ids_pro:
+                return 0  # Duplicate found
+            train_ids_pro.add(train_id)
+            processed_lines += 1  # Increment valid lines counter
+
+    if train_ids_pro != train_ids_ini or processed_lines != ini_lines:
+        return 0
+
+    # No duplicates found and at least one valid line was processed
+    return 1
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -1,5 +1,5 @@
 from typing import Dict
-import json
+import json, copy

 def check_json_keybindings(actual: str, expected: str, **options) -> float:
    """
@@ -10,36 +10,58 @@ def check_json_keybindings(actual: str, expected: str, **options) -> float:
    Return:
        float: the score
    """
+    def direct_load_json(fp):
+        try:
+            with open(fp, 'r') as f:
+                data = json.load(f)
+            return data
+        except:
+            return None
+    
+    def skip_first_line_load_json(fp):
+        try:
+            with open(fp, 'r') as f:
+                f.readline()
+                data = json.load(f)
+            return data
+        except:
+            return None

-    with open(actual) as f:
-        data = json.load(f)
-
+    for func in [direct_load_json, skip_first_line_load_json]:
+        data = func(actual)
+        if data is not None and type(data) == list:
+            break
+    else:
+        return 0.0
+    expected = expected['expect']
    if expected in data:
        return 1.0
    else:
        return 0.0

+
 def check_json_settings(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result text file
-        expected (str): expected dict{}
+        expected (dict): expected dict{}, containing key "expect"

    Return:
        float: the score
    """

-    with open(actual) as f:
+    with open(actual, 'r') as f:
        data = json.load(f)
-        
-    expect = set(expected.items())
-    json = set(data.items())
-
-    if expect.issubset(json):
+    
+    expect = expected['expect']
+    data_copy = copy.deepcopy(data)
+    data_copy.update(expect)
+    if data == data_copy:
        return 1.0
    else:
        return 0.0

+
 def compare_text_file(actual: str, expected: str, **options) -> float:
    """
    Args: