diff --git a/desktop_env/evaluators/metrics/impress.py b/desktop_env/evaluators/metrics/slides.py similarity index 54% rename from desktop_env/evaluators/metrics/impress.py rename to desktop_env/evaluators/metrics/slides.py index b68a6fe..c2be70e 100644 --- a/desktop_env/evaluators/metrics/impress.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -1,6 +1,36 @@ +import logging +import xml.etree.ElementTree as ET +from math import sqrt + from pptx import Presentation from pptx.util import Inches +logger = logging.getLogger("desktopenv.metric.slides") + + +def check_presenter_console_disable(config_file_path): + try: + tree = ET.parse(config_file_path) + root = tree.getroot() + + namespaces = { + 'oor': 'http://openoffice.org/2001/registry' + } + + for item in root.findall( + ".//item[@oor:path='/org.openoffice.Office.Impress/Misc/Start']/prop[@oor:name='EnablePresenterScreen']", + namespaces): + # Check if the value of the configuration item indicates that the presenter console has been disabled + presenter_screen_enabled = item.find('value').text + if presenter_screen_enabled.lower() == 'false': + return 1. + else: + return 0. + return 0. + except Exception as e: + logger.error(f"Error: {e}") + return 0. + def check_image_stretch_and_center(modified_ppt, original_ppt): # fixme: this func is overfit to this example libreoffice_impress @@ -72,32 +102,60 @@ def compare_pptx_files(file1_path, file2_path, **options): prs1 = Presentation(file1_path) prs2 = Presentation(file2_path) + examine_number_of_slides = options.get("examine_number_of_slides", True) + examine_shape = options.get("examine_shape", True) + examine_text = options.get("examine_text", True) + examine_font_name = options.get("examine_font_name", True) + examine_font_size = options.get("examine_font_size", True) + examine_font_bold = options.get("examine_font_bold", True) + examine_font_italic = options.get("examine_font_italic", True) + examine_color_rgb = options.get("examine_color_rgb", True) + examine_font_underline = options.get("examine_font_underline", True) + # compare the number of slides - if len(prs1.slides) != len(prs2.slides): + if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides: return False # compare the content of each slide for slide1, slide2 in zip(prs1.slides, prs2.slides): # check if the shapes are the same for shape1, shape2 in zip(slide1.shapes, slide2.shapes): - if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height: + if ( + shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape: return False if hasattr(shape1, "text") and hasattr(shape2, "text"): - if shape1.text != shape2.text: + if shape1.text != shape2.text and examine_text: return False # check if the paragraphs are the same for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): # check if the runs are the same for run1, run2 in zip(para1.runs, para2.runs): - if run1.text != run2.text: + if run1.text != run2.text and examine_text: return False # check if the font properties are the same - if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb: + if run1.font.name != run2.font.name and examine_font_name: return False + if run1.font.size != run2.font.size and examine_font_size: + return False + + if run1.font.bold != run2.font.bold and examine_font_bold: + return False + + if run1.font.italic != run2.font.italic and examine_font_italic: + return False + + if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb: + return False + + if run1.font.underline != run2.font.underline and examine_font_underline: + return False + + # fixme: Actually there are more properties to be compared, but we cannot get them through pptx + return True @@ -126,32 +184,6 @@ def check_strikethrough(pptx_path, rules): return True -def has_audio_on_page(slide): - for shape in slide.shapes: - if shape.shape_type == 13: - return True - return False - - -def check_for_audio(prs): - prs = Presentation(prs) - for i, slide in enumerate(prs.slides): - if has_audio_on_page(slide): - return 1 - return 0 - - -def check_formula_shape(prs): - prs = Presentation(prs) - slide = prs.slides[13] - - for shape in slide.shapes: - - if shape.has_text_frame and shape.shape_type == 1: - return 1 - return 0 - - def check_slide_orientation_Portrait(pptx_path): presentation = Presentation(pptx_path) @@ -163,20 +195,40 @@ def check_slide_orientation_Portrait(pptx_path): return 0 -def contains_mp4_video(pptx_path): - prs = Presentation(pptx_path) - for slide in prs.slides: - for shape in slide.shapes: - if shape.shape_type == 16: - if shape.media_type == 3: - return 1 - return 0 +def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules): + rgb = rules["rgb"] + + def slide_fill_distance_to_rgb(_slide, _rgb): + fill = _slide.background.fill + if fill.type == 1: + r1, g1, b1 = fill.fore_color.rgb + r2, g2, b2 = _rgb + return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2) + + return 1 + + prs = Presentation(pptx_file) + similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb) for slide in prs.slides) / len(prs.slides) + return similarity -if __name__ == "__main__": - path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx" - presentation = Presentation(path1) +def check_left_panel(accessibility_tree): + namespaces = { + 'st': 'uri:deskat:state.at-spi.gnome.org', + 'cp': 'uri:deskat:component.at-spi.gnome.org' + } - for i, sl in enumerate(presentation.slides): - for j, sh in enumerate(sl.shapes): - print(i, j, sh, sh.name, sh.shape_type, sh.text) + root = ET.fromstring(accessibility_tree) + + for root_pane in root.iter('root-pane'): + for panel in root_pane.iter('panel'): + for split_pane in panel.iter('split-pane'): + # Get the left panel + if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)": + # Get the visible attribute + visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st'])) + if visible: + # decide if it is left panel + return 1. + + return 0. diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py index ff3bbfa..17c9b40 100644 --- a/desktop_env/evaluators/metrics/vlc.py +++ b/desktop_env/evaluators/metrics/vlc.py @@ -10,6 +10,8 @@ import imagehash import librosa import numpy as np from PIL import Image +from fastdtw import fastdtw +from scipy.spatial.distance import cosine from skimage.metrics import structural_similarity as ssim logger = logging.getLogger("desktopenv.metrics.vlc") @@ -111,11 +113,10 @@ def compare_images(image1_path, image2_path): return similarity_index -def compare_audios(audio_path_1, audio_path_2, max_distance=1000): +def compare_audios(audio_path_1, audio_path_2): """ Compare two audio files and return a similarity score in the range [0, 1]. audio_path_1, audio_path_2: paths to the audio files to compare - max_distance: an empirically determined maximum expected DTW distance """ # Example Usage: # similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3') @@ -125,21 +126,31 @@ def compare_audios(audio_path_1, audio_path_2, max_distance=1000): if not audio_path_1 or not audio_path_2: return 0 + # Load the audio files and extract MFCC features y1, sr1 = librosa.load(audio_path_1) - y2, sr2 = librosa.load(audio_path_2) - - # Extract MFCC features mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1) + + y2, sr2 = librosa.load(audio_path_2) mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2) - # Compute Dynamic Time Warping distance - distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T) + # Normalize the MFCC features + mfcc1 = librosa.util.normalize(mfcc1, axis=1) + mfcc2 = librosa.util.normalize(mfcc2, axis=1) - # Normalize distance to get a similarity score - normalized_distance = np.mean(distance) / max_distance - similarity_score = 1 - min(normalized_distance, 1) # Ensure the score is within [0, 1] + # Define a lambda function to compute cosine distance + dist_func = lambda x, y: cosine(x, y) - return similarity_score + # Use the DTW algorithm to find the best alignment path + distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func) + + # Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score + similarity = 1 / (1 + distance) + + return similarity + + +def compare_audios_by_dl_model(audio_path_1, audio_path_2): + pass def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5): diff --git a/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json b/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json index 62aedee..4d42b6a 100644 --- a/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json +++ b/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json @@ -1,12 +1,19 @@ { "id": "0f84bef9-9790-432e-92b7-eece357603fb", "snapshot": "libreoffice_impress", - "instruction": "Could you help me disable the presenter screen mode in my slids?", + "instruction": "On it Whenever I launch a LibreOffice Impress, it uses both screens, one for current slide and next slide and another for actual presentation. What I want is to use only one monitor which shows presentation. I dont want the screen with Current slide and Next slide so that it can be used for other purposes. How should I achieve this?", "source": "https://stackoverflow.com/questions/29036788/how-to-disable-libreoffice-impress-to-use-multiple-display", "config": [], "trajectory": "trajectories/", "related_apps": [ - "" + "libreoffice_impress" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "check_presenter_console_disable", + "result": { + "type": "vm_file", + "path": "/home/user/.config/libreoffice/4/user/registrymodifications.xcu", + "dest": "registrymodifications.xcu" + } + } } diff --git a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json index 615cabc..6180dd6 100644 --- a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json +++ b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json @@ -27,11 +27,16 @@ "libreoffice_impress" ], "evaluator": { - "func": "compare_pptx_files", + "func": "evaluate_presentation_fill_to_rgb_distance", "expected": { - "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1LU-wnmIqMQgwkdAUFBLE1wNkH4gSl3IR&export=download&authuser=0&confirm=t&uuid=74520405-4028-4fbe-bab8-d56dc82ffb6c&at=APZUnTU0dz5ZE5CcQry8IeY5_s1J:1705481009686", - "dest": "lec17-gui-events_Gold.docx" + "type": "rule", + "rules": { + "rgb": [ + 0, + 0, + 255 + ] + } }, "result": { "type": "vm_file", diff --git a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json index c825ce7..7e232f9 100644 --- a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json +++ b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json @@ -35,7 +35,7 @@ }, "result": { "type": "vm_file", - "path": "Desktop/MLA_Workshop_061X_Works_Cited.pptx", + "path": "/home/user/Desktop/MLA_Workshop_061X_Works_Cited.pptx", "dest": "MLA_Workshop_061X_Works_Cited.pptx" } } diff --git a/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json b/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json index 3553fdd..ee8ecd7 100644 --- a/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json +++ b/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json @@ -9,8 +9,8 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1i_-m6mVrdesGJ392bulH5lveHarMwKk_&export=download&authuser=0&confirm=t&uuid=057973d3-52b7-45ac-8151-b2c6a1820f49&at=APZUnTU5SYajgO-YrxdDWSiJRfD4:1705768888387", - "path": "Desktop/Secrets-of-Monetizing-Video.pptx" + "url": "https://drive.usercontent.google.com/download?id=1i_-m6mVrdesGJ392bulH5lveHarMwKk_&export=download&authuser=0&confirm=t&uuid=686367a5-9383-4d60-8beb-108ef7119ffa&at=APZUnTXpMsCkSdM7fb5zdVz3qZUX:1705916772791", + "path": "Downloads/Secrets-of-Monetizing-Video.pptx" } ] } @@ -18,7 +18,7 @@ { "type": "open", "parameters": { - "path": "Desktop/Secrets-of-Monetizing-Video.pptx" + "path": "Downloads/Secrets-of-Monetizing-Video.pptx" } } ], @@ -27,8 +27,19 @@ "libreoffice_impress" ], "evaluator": { - "func": "check_file_exists", - "file_name": "pre.pptx", - "directory": "/home/user/Desktop/" + "func": "compare_pptx_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1i_-m6mVrdesGJ392bulH5lveHarMwKk_&export=download&authuser=0&confirm=t&uuid=686367a5-9383-4d60-8beb-108ef7119ffa&at=APZUnTXpMsCkSdM7fb5zdVz3qZUX:1705916772791", + "dest": "Secrets-of-Monetizing-Video.pptx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/pre.pptx", + "dest": "pre.pptx" + }, + "options": { + "examine_shape": false + } } } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json b/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json index 9eb877c..fce805f 100644 --- a/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json +++ b/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json @@ -38,11 +38,17 @@ "libreoffice_impress" ], "evaluator": { - "func": "check_for_audio", + "func": "compare_audios", "result": { + "type": "audio_in_slide", + "ppt_file_path": "/home/user/Desktop/Mady_and_Mia_Baseball.pptx", + "slide_index": 0, + "dest": "Baseball.mp3" + }, + "expected": { "type": "vm_file", - "path": "Desktop/Mady_and_Mia_Baseball.pptx", - "dest": "Mady_and_Mia_Baseball.pptx" + "path": "/home/user/Desktop/Baseball.mp3", + "dest": "Baseball_to_be_placed.mp3" } } } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/ef9d12bd-bcee-4ba0-a40e-918400f43ddf.json b/evaluation_examples/examples/libreoffice_impress/ef9d12bd-bcee-4ba0-a40e-918400f43ddf.json index 8d43d11..457ea47 100644 --- a/evaluation_examples/examples/libreoffice_impress/ef9d12bd-bcee-4ba0-a40e-918400f43ddf.json +++ b/evaluation_examples/examples/libreoffice_impress/ef9d12bd-bcee-4ba0-a40e-918400f43ddf.json @@ -1,12 +1,27 @@ { "id": "ef9d12bd-bcee-4ba0-a40e-918400f43ddf", "snapshot": "libreoffice_impress", - "instruction": "Could you help me open the side panel on the left?", + "instruction": "I closed the slide panel on the left and idk how to get it back please help", "source": "https://www.reddit.com/r/libreoffice/comments/18elh3y/i_closed_the_slide_pannel_on_the_left_and_idk_how/", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--impress" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ - "" + "libreoffice_impress" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "check_left_panel", + "result": { + "type": "accessibility_tree" + } + } }