From 983283a86a03963510c35ef117fcdfa943d8633f Mon Sep 17 00:00:00 2001 From: MillanK <51349692+MillanK0817@users.noreply.github.com> Date: Sat, 18 Jan 2025 22:25:18 +0800 Subject: [PATCH] patch: minor bug fixes for evaluator and task configurations, documentation update (#121) * fix: /cursor_position api return format fix * chore: update README.md to remove deprecated command * fix: add base score for evaluators and minor bug fixes * fix: add base score for setup configurations --------- Co-authored-by: Jiaqi Deng --- desktop_env/evaluators/getters/chrome.py | 4 +- desktop_env/evaluators/metrics/docs.py | 22 ++++++++-- desktop_env/evaluators/metrics/slides.py | 43 ++++++++++++++++--- desktop_env/evaluators/metrics/vlc.py | 14 ++++-- desktop_env/server/README.md | 3 -- desktop_env/server/main.py | 4 +- .../3b27600c-3668-4abd-8f84-7bcdebbccbdb.json | 5 +++ .../ecc2413d-8a48-416e-a3a2-d30106ca36cb.json | 6 +++ .../2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json | 2 +- .../c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json | 3 ++ .../a4d98375-215b-4a4d-aee9-3d4370fccc41.json | 12 ++++++ .../bedcedc4-4d72-425e-ad62-21960b11fe0d.json | 12 ++++++ .../efcf0d81-0835-4880-b2fd-d866e8bc2294.json | 5 ++- 13 files changed, 115 insertions(+), 20 deletions(-) diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index ba384de..cdd9a2b 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -57,6 +57,7 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any: port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" + backend_url = f"http://{host}:{server_port}" with sync_playwright() as p: # connect to remote Chrome instance try: @@ -69,7 +70,8 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any: "--remote-debugging-port=1337" ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) + #requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) + requests.post(backend_url + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index eb41e80..58bc4e5 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -47,7 +47,7 @@ def find_default_font(config_file_path, rules): return 1 if default_font == expected_font else 0 -def contains_page_break(docx_file): +def contains_page_break(docx_file, rules): if not docx_file: return 0 @@ -57,6 +57,11 @@ def contains_page_break(docx_file): logger.error(f"Error: {e}") return 0 + try: + expected_page_break_count = rules["page_break_count"] + except Exception as e: + expected_page_break_count = None + namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} for paragraph in doc.paragraphs: @@ -65,9 +70,15 @@ def contains_page_break(docx_file): for br in br_elems: if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and \ br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page': - return 1 - return 0 + page_break_count += 1 + + if expected_page_break_count is not None and page_break_count != expected_page_break_count: + return 0 + if page_break_count > 0: + return 1 + else: + return 0 def compare_docx_files(file1, file2, **options): ignore_blanks = options.get('ignore_blanks', True) @@ -867,7 +878,10 @@ def compare_references(file1, file2, **options): total_similarity += similarity result = total_similarity / len(ref1) - if result >= reference_base_result: + + epsilon = 0.01 + + if result >= reference_base_result + epsilon: return (result - reference_base_result) / (1 - reference_base_result) else: return 0 diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index 974246b..dac4fd1 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -167,9 +167,20 @@ def compare_pptx_files(file1_path, file2_path, **options): slide_idx += 1 def get_slide_background_color(slide): - background = slide.background - if background.fill.background(): - return background.fill.fore_color.rgb + # background = slide.background + # if background.fill.background(): + # return background.fill.fore_color.rgb + # else: + # return None + fill = slide.background.fill + if fill.type == 1: + return fill.fore_color.rgb + elif fill.type == 5: + master_fill = slide.slide_layout.slide_master.background.fill + if master_fill.type == 1: + return master_fill.fore_color.rgb + else: + return None else: return None @@ -185,6 +196,11 @@ def compare_pptx_files(file1_path, file2_path, **options): if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note: return 0 + + # check if the number of slides is the same + if len(slide1.shapes) != len(slide2.shapes): + return 0 + # check if the shapes are the same for shape1, shape2 in zip(slide1.shapes, slide2.shapes): if examine_title_bottom_position: @@ -363,11 +379,22 @@ def check_slide_orientation_Portrait(pptx_path): def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules): rgb = rules["rgb"] - def slide_fill_distance_to_rgb(_slide, _rgb): + try: + original_rgb = rules["original_rgb"] + except: + original_rgb = None + + def slide_fill_distance_to_rgb(_slide, _rgb, _original_rgb): fill = _slide.background.fill if fill.type == 1: r1, g1, b1 = fill.fore_color.rgb r2, g2, b2 = _rgb + + if _original_rgb is not None: + r3, g3, b3 = _original_rgb + if r1 == r3 and g1 == g3 and b1 == b3: + return 1 + return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2) elif fill.type == 5: master_fill = _slide.slide_layout.slide_master.background.fill @@ -376,12 +403,18 @@ def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules): else: return 1 r2, g2, b2 = _rgb + + if _original_rgb is not None: + r3, g3, b3 = _original_rgb + if r1 == r3 and g1 == g3 and b1 == b3: + return 1 + return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2) return 1 prs = Presentation(pptx_file) - similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb) for slide in prs.slides) / len(prs.slides) + similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb, original_rgb) for slide in prs.slides) / len(prs.slides) return similarity diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py index 3aaa75a..a4f6abc 100644 --- a/desktop_env/evaluators/metrics/vlc.py +++ b/desktop_env/evaluators/metrics/vlc.py @@ -83,7 +83,7 @@ def is_vlc_fullscreen(actual_window_size, screen_size): return 0 -def compare_images(image1_path, image2_path): +def compare_images(image1_path, image2_path, **options): # You would call this function with the paths to the two images you want to compare: # score = compare_images('path_to_image1', 'path_to_image2') # print("Similarity score:", score) @@ -91,6 +91,8 @@ def compare_images(image1_path, image2_path): if not image1_path or not image2_path: return 0 + base_score = options.get("reference_base_result", None) + # Open the images and convert to grayscale image1 = Image.open(image1_path).convert('L') image2 = Image.open(image2_path).convert('L') @@ -110,8 +112,14 @@ def compare_images(image1_path, image2_path): # Calculate SSIM between two images similarity_index = ssim(image1_array, image2_array) - return similarity_index - + epsilon = 0.01 + if base_score is None: + if similarity_index >= base_score + epsilon: + return (similarity_index - base_score) / (1 - base_score) + else: + return 0 + else: + return similarity_index def compare_audios(audio_path_1, audio_path_2): """ diff --git a/desktop_env/server/README.md b/desktop_env/server/README.md index ebbc4bf..6d1dccd 100644 --- a/desktop_env/server/README.md +++ b/desktop_env/server/README.md @@ -249,9 +249,6 @@ echo -n "" >> ~/.local/share/keyrings/login.keyring # Restart Chrome after applying changes -# Alternative Method: Disable keyring service -sudo apt remove gnome-keyring - # Or just prevent Chrome from using keyring mkdir -p ~/.local/share/keyrings touch ~/.local/share/keyrings/login.keyring diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index f4107d2..77ed4d4 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -1051,8 +1051,8 @@ def get_platform(): @app.route('/cursor_position', methods=['GET']) def get_cursor_position(): - return pyautogui.position().x, pyautogui.position().y - + pos = pyautogui.position() + return jsonify(pos.x, pos.y) @app.route("/setup/change_wallpaper", methods=['POST']) def change_wallpaper(): diff --git a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json index 591bbd0..b61526a 100644 --- a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json +++ b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json @@ -66,6 +66,11 @@ 0, 0, 255 + ], + "original_rgb": [ + 255, + 255, + 255 ] } }, diff --git a/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json b/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json index 87128a2..3f4ea14 100644 --- a/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json +++ b/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json @@ -53,6 +53,12 @@ } ], "func": "contains_page_break", + "expected": { + "type": "rule", + "rules": { + "page_break_count": 5 + } + }, "result": { "type": "vm_file", "path": "/home/user/Desktop/Sample_Statutory_Declaration.docx", diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json index 808bbc0..55517d9 100644 --- a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -128,7 +128,7 @@ }, "options": { "content_only": true, - "reference_base_result": 0.92 + "reference_base_result": 0.93 } } } diff --git a/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json index dcd790e..1ccc311 100644 --- a/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json +++ b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json @@ -53,6 +53,9 @@ "type": "cloud_file", "path": "https://drive.usercontent.google.com/download?id=1iIdjmXD6fRSYtpqxujTGzx8agvIgXsqz&export=download&authuser=0&confirm=t", "dest": "gold_background.png" + }, + "options": { + "reference_base_result": 0.54 } } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/a4d98375-215b-4a4d-aee9-3d4370fccc41.json b/evaluation_examples/examples/os/a4d98375-215b-4a4d-aee9-3d4370fccc41.json index 750fea6..969808e 100644 --- a/evaluation_examples/examples/os/a4d98375-215b-4a4d-aee9-3d4370fccc41.json +++ b/evaluation_examples/examples/os/a4d98375-215b-4a4d-aee9-3d4370fccc41.json @@ -5,6 +5,18 @@ "source": "https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en", "trajectory": "trajectories/", "config": [ + { + "type": "execute", + "parameters": { + "command": [ + "gsettings", + "set", + "org.gnome.desktop.screensaver", + "lock-enabled", + "false" + ] + } + }, { "type": "execute", "parameters": { diff --git a/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json index 97e8545..d795c21 100644 --- a/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json +++ b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json @@ -5,6 +5,18 @@ "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", "trajectory": "trajectories/", "config": [ + { + "type": "execute", + "parameters": { + "command": [ + "gsettings", + "set", + "org.gnome.desktop.session", + "idle-delay", + "1" + ] + } + }, { "type": "execute", "parameters": { diff --git a/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json b/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json index c1ffc75..ed40c37 100644 --- a/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json +++ b/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json @@ -52,6 +52,9 @@ "result": { "type": "vm_wallpaper", "dest": "result_wallpaper.png" + }, + "options": { + "reference_base_result": 0.11 } } -} +} \ No newline at end of file