feat: enhance image comparison functionality in gimp.py

- Added resizing logic to handle images of different sizes before comparison, ensuring consistent evaluation. - Implemented mode conversion to ensure both images are in the same format for accurate comparison. - Enhanced structure check by MSE to support conversion of numpy arrays to PIL Images, improving compatibility. - Maintained existing logic while improving robustness and accuracy of image comparison methods.
2025-07-30 06:07:49 +00:00
parent 4ae9d41da4
commit dd488c7294
1 changed files with 31 additions and 0 deletions
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -17,6 +17,16 @@ def compare_image_list(pred_img_path_list: Union[str, List[str]],
            return 0.0
        pred_img = Image.open(pred_img_path)
        gold_img = Image.open(gold_img_path)
        # Check if images have different sizes and resize if necessary
        if pred_img.size != gold_img.size:
            logging.debug(f"Images have different sizes: {pred_img.size} vs {gold_img.size}, resizing predicted image to match gold image")
            pred_img = pred_img.resize(gold_img.size, Image.Resampling.LANCZOS)
        # Ensure both images are in the same mode for comparison
        if pred_img.mode != gold_img.mode:
            pred_img = pred_img.convert(gold_img.mode)
        diff = ImageChops.difference(pred_img, gold_img)
        if diff.getbbox():
            return 0.0
@@ -190,6 +200,27 @@ def calculate_image_sharpness(image_path):
 def structure_check_by_mse(img1, img2, threshold=0.03):
    """Check if two images are approximately the same by MSE"""
    # Ensure both images are PIL Image objects
    if not hasattr(img1, 'size') or not hasattr(img2, 'size'):
        # Convert numpy arrays to PIL Images if needed
        if hasattr(img1, 'shape'):
            img1 = Image.fromarray(img1)
        if hasattr(img2, 'shape'):
            img2 = Image.fromarray(img2)
    # Check if images have different sizes and resize if necessary
    if img1.size != img2.size:
        logging.debug(f"Images have different sizes: {img1.size} vs {img2.size}, resizing first image to match second")
        img1 = img1.resize(img2.size, Image.Resampling.LANCZOS)
    # Ensure both images are in RGB mode for consistent comparison
    if img1.mode != 'RGB':
        img1 = img1.convert('RGB')
    if img2.mode != 'RGB':
        img2 = img2.convert('RGB')
    # Now calculate MSE with properly sized images
    mse = np.mean(
        (np.array(img1, dtype=np.float32) / 255
         - np.array(img2, dtype=np.float32) / 255) ** 2)