From a68d6f7ab60ba3ac318b1728c4d98e84d9ec36e5 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Sun, 6 Jul 2025 19:38:22 +0000 Subject: [PATCH] Enhance GIMP metrics evaluator with logging and transparency handling - Replaced print statements with logging for better traceability in gimp.py. - Added handling for transparent images in structure checks and size evaluations. - Updated JSON examples to include delays in pyautogui commands for improved execution reliability. - Changed image URL in example to a more accessible source. --- desktop_env/evaluators/metrics/gimp.py | 75 +++++++++++++++++-- desktop_env/providers/aws/manager.py | 2 +- .../2a729ded-3296-423d-aec4-7dd55ed5fbb3.json | 6 +- .../554785e9-4523-4e7a-b8e1-8016f565f56a.json | 2 +- .../72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json | 4 +- .../734d6579-c07d-47a8-9ae2-13339795476b.json | 4 +- .../7767eef2-56a3-4cea-8c9f-48c070c7d65b.json | 2 +- .../7a4deb26-d57d-4ea9-9a73-630f66a7b568.json | 4 +- .../7b7617bd-57cc-468e-9c91-40c4ec2bcb3d.json | 2 +- .../a746add2-cab0-4740-ac36-c3769d9bfb46.json | 6 +- .../b148e375-fe0b-4bec-90e7-38632b0d73c2.json | 2 +- .../d16c99dc-2a1e-46f2-b350-d97c86c85c15.json | 8 +- .../e2dd0213-26db-4349-abe5-d5667bfd725c.json | 2 +- .../f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json | 4 +- .../f723c744-e62c-4ae6-98d1-750d3cd7d79d.json | 6 +- 15 files changed, 95 insertions(+), 34 deletions(-) diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py index 9f374e1..5dddd78 100644 --- a/desktop_env/evaluators/metrics/gimp.py +++ b/desktop_env/evaluators/metrics/gimp.py @@ -1,4 +1,5 @@ import os +import logging from typing import List, Union from skimage.metrics import structural_similarity as ssim from PIL import Image, ImageChops, ImageStat @@ -39,7 +40,7 @@ def get_gimp_export_path(): return current_path except FileNotFoundError: # Handle the case where the configuration file is not found - print("GIMP configuration file not found") + logging.debug("GIMP configuration file not found") return False @@ -193,14 +194,14 @@ def structure_check_by_mse(img1, img2, threshold=0.03): (np.array(img1, dtype=np.float32) / 255 - np.array(img2, dtype=np.float32) / 255) ** 2) structure_same = True if mse < threshold else False - print(f"MSE: {mse}, threshold: {threshold}") + logging.debug(f"MSE: {mse}, threshold: {threshold}") return structure_same def structure_check_by_ssim(img1, img2, threshold=0.9): """Check if two images are approximately the same by SSIM""" similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1) - print("SSIM: ", similarity) + logging.debug("SSIM: %s", similarity) return similarity >= threshold @@ -364,11 +365,37 @@ def check_structure_sim_resized(src_path, tgt_path): img_src = Image.open(src_path) img_tgt = Image.open(tgt_path) - # Resize the images to the same size - img_src = img_src.resize(img_tgt.size) + # Check if source image has transparency and extract content area + if img_src.mode in ('RGBA', 'LA') or 'transparency' in img_src.info: + if img_src.mode != 'RGBA': + img_src = img_src.convert('RGBA') + + # Get alpha channel and find bounding box of non-transparent pixels + alpha = img_src.split()[-1] + bbox = alpha.getbbox() + + if bbox is None: + # Image is completely transparent + logging.debug("Source image is completely transparent") + return 0. + + # Crop to content area only + img_src_content = img_src.crop(bbox) + logging.debug(f"Source image cropped from {img_src.size} to {img_src_content.size}") + + # Convert to RGB for comparison + img_src_content = img_src_content.convert('RGB') + img_src_resized = img_src_content.resize(img_tgt.size) + else: + # No transparency, resize normally + img_src_resized = img_src.resize(img_tgt.size) + + # Ensure target image is RGB for comparison + if img_tgt.mode != 'RGB': + img_tgt = img_tgt.convert('RGB') # Check if the structure is similar - structure_same = structure_check_by_ssim(img_src, img_tgt) + structure_same = structure_check_by_ssim(img_src_resized, img_tgt) return structure_same @@ -433,20 +460,52 @@ def check_image_size(src_path, rule): # Load the image img = Image.open(src_path) + + # Check if we should ignore transparent parts + ignore_transparent = rule.get("ignore_transparent", False) + + if ignore_transparent and img.mode in ('RGBA', 'LA') or 'transparency' in img.info: + # Calculate bounding box of non-transparent pixels + if img.mode != 'RGBA': + img = img.convert('RGBA') + + # Get alpha channel + alpha = img.split()[-1] + + # Find bounding box of non-transparent pixels + bbox = alpha.getbbox() + + if bbox is None: + # Image is completely transparent + actual_width = 0 + actual_height = 0 + else: + # Calculate actual content size + actual_width = bbox[2] - bbox[0] + actual_height = bbox[3] - bbox[1] + + logging.debug(f"Original size: {img.size}, Content size: {actual_width}x{actual_height}") + else: + # Use original image size + actual_width = img.size[0] + actual_height = img.size[1] + logging.debug(f"Image size: {img.size}") # Check the size if rule.get("height", None) is not None: - height_same = img.size[1] == rule["height"] + height_same = actual_height == rule["height"] else: height_same = True if rule.get("width", None) is not None: - width_same = img.size[0] == rule["width"] + width_same = actual_width == rule["width"] else: width_same = True if height_same and width_same: + logging.debug(f"height_same: {height_same}, width_same: {width_same}") return 1. else: + logging.debug(f"height_same: {height_same}, width_same: {width_same}") return 0. diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py index 01b904f..287327d 100644 --- a/desktop_env/providers/aws/manager.py +++ b/desktop_env/providers/aws/manager.py @@ -36,7 +36,7 @@ DEFAULT_REGION = "us-east-1" # todo: Add doc for the configuration of image, security group and network interface # todo: public the AMI images IMAGE_ID_MAP = { - "us-east-1": "ami-0a6a55a75d04c8888", + "us-east-1": "ami-09138bff939f82bd8", "ap-east-1": "ami-0c092a5b8be4116f5", } diff --git a/evaluation_examples/examples/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json b/evaluation_examples/examples/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json index ae7a6c9..5940380 100644 --- a/evaluation_examples/examples/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json +++ b/evaluation_examples/examples/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json @@ -41,7 +41,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" ] } }, @@ -57,7 +57,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"dog_without_background\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"dog_without_background\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, @@ -73,7 +73,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json b/evaluation_examples/examples/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json index 916bc46..2b11f55 100644 --- a/evaluation_examples/examples/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json +++ b/evaluation_examples/examples/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json @@ -53,7 +53,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"edited_colorful\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"edited_colorful\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json b/evaluation_examples/examples/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json index d9f00f4..e4f9083 100644 --- a/evaluation_examples/examples/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json +++ b/evaluation_examples/examples/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json @@ -37,7 +37,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" ] } }, @@ -53,7 +53,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"berry_mirror\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"berry_mirror\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json b/evaluation_examples/examples/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json index d5c06b8..7b9a8b5 100644 --- a/evaluation_examples/examples/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json +++ b/evaluation_examples/examples/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json @@ -41,7 +41,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" ] } }, @@ -57,7 +57,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"green_background_with_object\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"green_background_with_object\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/7767eef2-56a3-4cea-8c9f-48c070c7d65b.json b/evaluation_examples/examples/gimp/7767eef2-56a3-4cea-8c9f-48c070c7d65b.json index b412d54..51fb60c 100644 --- a/evaluation_examples/examples/gimp/7767eef2-56a3-4cea-8c9f-48c070c7d65b.json +++ b/evaluation_examples/examples/gimp/7767eef2-56a3-4cea-8c9f-48c070c7d65b.json @@ -25,7 +25,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"ctrl\", \"q\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json b/evaluation_examples/examples/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json index 558bf24..2bc51b7 100644 --- a/evaluation_examples/examples/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json +++ b/evaluation_examples/examples/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json @@ -37,7 +37,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" ] } }, @@ -53,7 +53,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"edited_darker\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"edited_darker\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/7b7617bd-57cc-468e-9c91-40c4ec2bcb3d.json b/evaluation_examples/examples/gimp/7b7617bd-57cc-468e-9c91-40c4ec2bcb3d.json index 39ab7f0..c120347 100644 --- a/evaluation_examples/examples/gimp/7b7617bd-57cc-468e-9c91-40c4ec2bcb3d.json +++ b/evaluation_examples/examples/gimp/7b7617bd-57cc-468e-9c91-40c4ec2bcb3d.json @@ -25,7 +25,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"ctrl\", \"q\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json b/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json index 557e13c..15d926d 100644 --- a/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json +++ b/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json @@ -9,7 +9,7 @@ "parameters": { "files": [ { - "url": "https://agent-files.deva.msh.team/osworld/benchmark_files/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46_dog_with_background.png", + "url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46/dog_with_background.png", "path": "/home/user/Desktop/dog_with_background.png" } ] @@ -37,7 +37,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.press([\"esc\"]);pyautogui.hotkey([\"ctrl\", \"q\"]);" + "import time; import pyautogui; pyautogui.press([\"esc\"]);time.sleep(1);pyautogui.hotkey([\"ctrl\", \"q\"]);time.sleep(1);" ] } }, @@ -47,7 +47,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.press([\"right\"]);pyautogui.press([\"enter\"]);" + "import time; import pyautogui; pyautogui.press([\"right\"]);time.sleep(1);pyautogui.press([\"enter\"])" ] } }, diff --git a/evaluation_examples/examples/gimp/b148e375-fe0b-4bec-90e7-38632b0d73c2.json b/evaluation_examples/examples/gimp/b148e375-fe0b-4bec-90e7-38632b0d73c2.json index 984e6f2..5e3baf9 100644 --- a/evaluation_examples/examples/gimp/b148e375-fe0b-4bec-90e7-38632b0d73c2.json +++ b/evaluation_examples/examples/gimp/b148e375-fe0b-4bec-90e7-38632b0d73c2.json @@ -37,7 +37,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"ctrl\", \"q\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json index e41d581..b95ee95 100644 --- a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json +++ b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json @@ -41,7 +41,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);time.sleep(1);" ] } }, @@ -57,7 +57,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"resized\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; pyautogui.write(\"resized\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, @@ -88,11 +88,13 @@ "check_image_size", "check_structure_sim_resized" ], + "conj": "and", "expected": [ { "type": "rule", "rules": { - "height": 512 + "height": 512, + "ignore_transparent": true } }, { diff --git a/evaluation_examples/examples/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json b/evaluation_examples/examples/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json index 2eac273..6474dfe 100644 --- a/evaluation_examples/examples/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json +++ b/evaluation_examples/examples/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json @@ -53,7 +53,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"leftside_textbox\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; pyautogui.write(\"leftside_textbox\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json b/evaluation_examples/examples/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json index d69be03..d8b103d 100644 --- a/evaluation_examples/examples/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json +++ b/evaluation_examples/examples/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json @@ -57,7 +57,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"Triangle_In_The_Middle\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"Triangle_In_The_Middle\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, @@ -73,7 +73,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, diff --git a/evaluation_examples/examples/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json b/evaluation_examples/examples/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json index 0ec6072..c5fc0f5 100644 --- a/evaluation_examples/examples/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json +++ b/evaluation_examples/examples/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json @@ -37,7 +37,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.hotkey([\"shift\", \"ctrl\", \"e\"]);" ] } }, @@ -53,7 +53,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.write(\"berries_contrast\");pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.write(\"berries_contrast\");time.sleep(1);pyautogui.press([\"enter\"]);" ] } }, @@ -69,7 +69,7 @@ "command": [ "python3", "-c", - "import pyautogui; pyautogui.press([\"enter\"]);" + "import time; import pyautogui; time.sleep(1);pyautogui.press([\"enter\"]);" ] } },