Merge branch 'main' of github.com:ztjhz/DesktopEnv

2024-01-23 22:54:57 +08:00
parent 2f8e1b6a50 402d7f8402
commit d757b39985
36 changed files with 945 additions and 473 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,7 +1,7 @@
 from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
-from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
+from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers, compare_docx_lines
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
-    compare_insert_equation
+    compare_insert_equation, compare_highlighted_text
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
 from .docs import evaluate_colored_words_in_tables, check_highlighted_words, evaluate_strike_through_last_paragraph, \
    evaluate_conversion, evaluate_spacing, check_italic_font_size_14, evaluate_alignment, get_unique_train_ids, \
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -357,3 +357,31 @@ def check_no_duplicates(initial_file, processed_file):

    # No duplicates found and at least one valid line was processed
    return 1
+
+def compare_docx_lines(file1, file2):
+    # Read the text of the document, line by line
+    doc1 = Document(file1)
+    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
+
+    doc2 = Document(file2)
+    doc2_lines = [p.text.strip() for p in doc2.paragraphs if p.text.strip()]
+
+    # Convert the list of lines to sets and compare
+    return set(doc1_lines) == set(doc2_lines)
+
+def compare_highlighted_text(file1, file2):
+    def extract_highlighted_text(doc):
+        highlighted_text = []
+        # Iterate through each run in each paragraph to check for highlight
+        for paragraph in doc.paragraphs:
+            for run in paragraph.runs:
+                if run.font.highlight_color:  # Checks if the run is highlighted
+                    highlighted_text.append(run.text.strip())
+        return highlighted_text
+
+    # Read the highlighted text from both documents
+    doc1_highlighted = extract_highlighted_text(Document(file1))
+    doc2_highlighted = extract_highlighted_text(Document(file2))
+
+    # Compare the sets of highlighted text to check if they are the same
+    return set(doc1_highlighted) == set(doc2_highlighted)
--- a/desktop_env/evaluators/metrics/impress.py
+++ b/desktop_env/evaluators/metrics/impress.py
@@ -1,6 +1,36 @@
+import logging
+import xml.etree.ElementTree as ET
+from math import sqrt
+
 from pptx import Presentation
 from pptx.util import Inches

+logger = logging.getLogger("desktopenv.metric.slides")
+
+
+def check_presenter_console_disable(config_file_path):
+    try:
+        tree = ET.parse(config_file_path)
+        root = tree.getroot()
+
+        namespaces = {
+            'oor': 'http://openoffice.org/2001/registry'
+        }
+
+        for item in root.findall(
+                ".//item[@oor:path='/org.openoffice.Office.Impress/Misc/Start']/prop[@oor:name='EnablePresenterScreen']",
+                namespaces):
+            # Check if the value of the configuration item indicates that the presenter console has been disabled
+            presenter_screen_enabled = item.find('value').text
+            if presenter_screen_enabled.lower() == 'false':
+                return 1.
+            else:
+                return 0.
+        return 0.
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0.
+

 def check_image_stretch_and_center(modified_ppt, original_ppt):
    # fixme: this func is overfit to this example libreoffice_impress
@@ -72,32 +102,60 @@ def compare_pptx_files(file1_path, file2_path, **options):
    prs1 = Presentation(file1_path)
    prs2 = Presentation(file2_path)

+    examine_number_of_slides = options.get("examine_number_of_slides", True)
+    examine_shape = options.get("examine_shape", True)
+    examine_text = options.get("examine_text", True)
+    examine_font_name = options.get("examine_font_name", True)
+    examine_font_size = options.get("examine_font_size", True)
+    examine_font_bold = options.get("examine_font_bold", True)
+    examine_font_italic = options.get("examine_font_italic", True)
+    examine_color_rgb = options.get("examine_color_rgb", True)
+    examine_font_underline = options.get("examine_font_underline", True)
+
    # compare the number of slides
-    if len(prs1.slides) != len(prs2.slides):
+    if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides:
        return False

    # compare the content of each slide
    for slide1, slide2 in zip(prs1.slides, prs2.slides):
        # check if the shapes are the same
        for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
-            if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
+            if (
+                    shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
                return False

            if hasattr(shape1, "text") and hasattr(shape2, "text"):
-                if shape1.text != shape2.text:
+                if shape1.text != shape2.text and examine_text:
                    return False

                # check if the paragraphs are the same
                for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
                    # check if the runs are the same
                    for run1, run2 in zip(para1.runs, para2.runs):
-                        if run1.text != run2.text:
+                        if run1.text != run2.text and examine_text:
                            return False

                        # check if the font properties are the same
-                        if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb:
+                        if run1.font.name != run2.font.name and examine_font_name:
                            return False

+                        if run1.font.size != run2.font.size and examine_font_size:
+                            return False
+
+                        if run1.font.bold != run2.font.bold and examine_font_bold:
+                            return False
+
+                        if run1.font.italic != run2.font.italic and examine_font_italic:
+                            return False
+
+                        if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
+                            return False
+
+                        if run1.font.underline != run2.font.underline and examine_font_underline:
+                            return False
+
+                        # fixme: Actually there are more properties to be compared, but we cannot get them through pptx
+
    return True


@@ -126,32 +184,6 @@ def check_strikethrough(pptx_path, rules):
    return True


-def has_audio_on_page(slide):
-    for shape in slide.shapes:
-        if shape.shape_type == 13:
-            return True
-    return False
-
-
-def check_for_audio(prs):
-    prs = Presentation(prs)
-    for i, slide in enumerate(prs.slides):
-        if has_audio_on_page(slide):
-            return 1
-    return 0
-
-
-def check_formula_shape(prs):
-    prs = Presentation(prs)
-    slide = prs.slides[13]
-
-    for shape in slide.shapes:
-
-        if shape.has_text_frame and shape.shape_type == 1:
-            return 1
-    return 0
-
-
 def check_slide_orientation_Portrait(pptx_path):
    presentation = Presentation(pptx_path)

@@ -163,20 +195,40 @@ def check_slide_orientation_Portrait(pptx_path):
    return 0


-def contains_mp4_video(pptx_path):
-    prs = Presentation(pptx_path)
-    for slide in prs.slides:
-        for shape in slide.shapes:
-            if shape.shape_type == 16:
-                if shape.media_type == 3:
-                    return 1
-    return 0
+def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules):
+    rgb = rules["rgb"]
+
+    def slide_fill_distance_to_rgb(_slide, _rgb):
+        fill = _slide.background.fill
+        if fill.type == 1:
+            r1, g1, b1 = fill.fore_color.rgb
+            r2, g2, b2 = _rgb
+            return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
+
+        return 1
+
+    prs = Presentation(pptx_file)
+    similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb) for slide in prs.slides) / len(prs.slides)
+    return similarity


-if __name__ == "__main__":
-    path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
-    presentation = Presentation(path1)
+def check_left_panel(accessibility_tree):
+    namespaces = {
+        'st': 'uri:deskat:state.at-spi.gnome.org',
+        'cp': 'uri:deskat:component.at-spi.gnome.org'
+    }

-    for i, sl in enumerate(presentation.slides):
-        for j, sh in enumerate(sl.shapes):
-            print(i, j, sh, sh.name, sh.shape_type, sh.text)
+    root = ET.fromstring(accessibility_tree)
+
+    for root_pane in root.iter('root-pane'):
+        for panel in root_pane.iter('panel'):
+            for split_pane in panel.iter('split-pane'):
+                # Get the left panel
+                if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)":
+                    # Get the visible attribute
+                    visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st']))
+                    if visible:
+                        # decide if it is left panel
+                        return 1.
+
+    return 0.
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -10,6 +10,8 @@ import imagehash
 import librosa
 import numpy as np
 from PIL import Image
+from fastdtw import fastdtw
+from scipy.spatial.distance import cosine
 from skimage.metrics import structural_similarity as ssim

 logger = logging.getLogger("desktopenv.metrics.vlc")
@@ -111,11 +113,10 @@ def compare_images(image1_path, image2_path):
    return similarity_index


-def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
+def compare_audios(audio_path_1, audio_path_2):
    """
    Compare two audio files and return a similarity score in the range [0, 1].
    audio_path_1, audio_path_2: paths to the audio files to compare
-    max_distance: an empirically determined maximum expected DTW distance
    """
    # Example Usage:
    # similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
@@ -125,21 +126,31 @@ def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
    if not audio_path_1 or not audio_path_2:
        return 0

+    # Load the audio files and extract MFCC features
    y1, sr1 = librosa.load(audio_path_1)
-    y2, sr2 = librosa.load(audio_path_2)
-
-    # Extract MFCC features
    mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
+
+    y2, sr2 = librosa.load(audio_path_2)
    mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)

-    # Compute Dynamic Time Warping distance
-    distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T)
+    # Normalize the MFCC features
+    mfcc1 = librosa.util.normalize(mfcc1, axis=1)
+    mfcc2 = librosa.util.normalize(mfcc2, axis=1)

-    # Normalize distance to get a similarity score
-    normalized_distance = np.mean(distance) / max_distance
-    similarity_score = 1 - min(normalized_distance, 1)  # Ensure the score is within [0, 1]
+    # Define a lambda function to compute cosine distance
+    dist_func = lambda x, y: cosine(x, y)

-    return similarity_score
+    # Use the DTW algorithm to find the best alignment path
+    distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func)
+
+    # Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score
+    similarity = 1 / (1 + distance)
+
+    return similarity
+
+
+def compare_audios_by_dl_model(audio_path_1, audio_path_2):
+    pass


 def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):