Enhance GIMP metrics evaluator with logging and transparency handling

- Replaced print statements with logging for better traceability in gimp.py. - Added handling for transparent images in structure checks and size evaluations. - Updated JSON examples to include delays in pyautogui commands for improved execution reliability. - Changed image URL in example to a more accessible source.
2025-07-06 19:38:22 +00:00
parent 8facb285a1
commit a68d6f7ab6
15 changed files with 95 additions and 34 deletions
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -1,4 +1,5 @@
 import os
+import logging
 from typing import List, Union
 from skimage.metrics import structural_similarity as ssim
 from PIL import Image, ImageChops, ImageStat
@@ -39,7 +40,7 @@ def get_gimp_export_path():
                    return current_path
    except FileNotFoundError:
        # Handle the case where the configuration file is not found
-        print("GIMP configuration file not found")
+        logging.debug("GIMP configuration file not found")
        return False


@@ -193,14 +194,14 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
        (np.array(img1, dtype=np.float32) / 255
         - np.array(img2, dtype=np.float32) / 255) ** 2)
    structure_same = True if mse < threshold else False
-    print(f"MSE: {mse}, threshold: {threshold}")
+    logging.debug(f"MSE: {mse}, threshold: {threshold}")
    return structure_same


 def structure_check_by_ssim(img1, img2, threshold=0.9):
    """Check if two images are approximately the same by SSIM"""
    similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1)
-    print("SSIM: ", similarity)
+    logging.debug("SSIM: %s", similarity)
    return similarity >= threshold


@@ -364,11 +365,37 @@ def check_structure_sim_resized(src_path, tgt_path):
    img_src = Image.open(src_path)
    img_tgt = Image.open(tgt_path)

-    # Resize the images to the same size
-    img_src = img_src.resize(img_tgt.size)
+    # Check if source image has transparency and extract content area
+    if img_src.mode in ('RGBA', 'LA') or 'transparency' in img_src.info:
+        if img_src.mode != 'RGBA':
+            img_src = img_src.convert('RGBA')
+        
+        # Get alpha channel and find bounding box of non-transparent pixels
+        alpha = img_src.split()[-1]
+        bbox = alpha.getbbox()
+        
+        if bbox is None:
+            # Image is completely transparent
+            logging.debug("Source image is completely transparent")
+            return 0.
+        
+        # Crop to content area only
+        img_src_content = img_src.crop(bbox)
+        logging.debug(f"Source image cropped from {img_src.size} to {img_src_content.size}")
+        
+        # Convert to RGB for comparison
+        img_src_content = img_src_content.convert('RGB')
+        img_src_resized = img_src_content.resize(img_tgt.size)
+    else:
+        # No transparency, resize normally
+        img_src_resized = img_src.resize(img_tgt.size)
+    
+    # Ensure target image is RGB for comparison
+    if img_tgt.mode != 'RGB':
+        img_tgt = img_tgt.convert('RGB')

    # Check if the structure is similar
-    structure_same = structure_check_by_ssim(img_src, img_tgt)
+    structure_same = structure_check_by_ssim(img_src_resized, img_tgt)
    return structure_same


@@ -433,20 +460,52 @@ def check_image_size(src_path, rule):

    # Load the image
    img = Image.open(src_path)
+    
+    # Check if we should ignore transparent parts
+    ignore_transparent = rule.get("ignore_transparent", False)
+    
+    if ignore_transparent and img.mode in ('RGBA', 'LA') or 'transparency' in img.info:
+        # Calculate bounding box of non-transparent pixels
+        if img.mode != 'RGBA':
+            img = img.convert('RGBA')
+        
+        # Get alpha channel
+        alpha = img.split()[-1]
+        
+        # Find bounding box of non-transparent pixels
+        bbox = alpha.getbbox()
+        
+        if bbox is None:
+            # Image is completely transparent
+            actual_width = 0
+            actual_height = 0
+        else:
+            # Calculate actual content size
+            actual_width = bbox[2] - bbox[0]
+            actual_height = bbox[3] - bbox[1]
+        
+        logging.debug(f"Original size: {img.size}, Content size: {actual_width}x{actual_height}")
+    else:
+        # Use original image size
+        actual_width = img.size[0]
+        actual_height = img.size[1]
+        logging.debug(f"Image size: {img.size}")

    # Check the size
    if rule.get("height", None) is not None:
-        height_same = img.size[1] == rule["height"]
+        height_same = actual_height == rule["height"]
    else:
        height_same = True
    if rule.get("width", None) is not None:
-        width_same = img.size[0] == rule["width"]
+        width_same = actual_width == rule["width"]
    else:
        width_same = True

    if height_same and width_same:
+        logging.debug(f"height_same: {height_same}, width_same: {width_same}")
        return 1.
    else:
+        logging.debug(f"height_same: {height_same}, width_same: {width_same}")
        return 0.


--- a/desktop_env/providers/aws/manager.py
+++ b/desktop_env/providers/aws/manager.py
@@ -36,7 +36,7 @@ DEFAULT_REGION = "us-east-1"
 # todo: Add doc for the configuration of image, security group and network interface
 # todo: public the AMI images
 IMAGE_ID_MAP = {
-    "us-east-1": "ami-0a6a55a75d04c8888",
+    "us-east-1": "ami-09138bff939f82bd8",
    "ap-east-1": "ami-0c092a5b8be4116f5",
 }