feat: enhance image comparison functionality in gimp.py

- Added resizing logic to handle images of different sizes before comparison, ensuring consistent evaluation.
- Implemented mode conversion to ensure both images are in the same format for accurate comparison.
- Enhanced structure check by MSE to support conversion of numpy arrays to PIL Images, improving compatibility.
- Maintained existing logic while improving robustness and accuracy of image comparison methods.
This commit is contained in:
yuanmengqi
2025-07-30 06:07:49 +00:00
parent 4ae9d41da4
commit dd488c7294

View File

@@ -17,6 +17,16 @@ def compare_image_list(pred_img_path_list: Union[str, List[str]],
return 0.0
pred_img = Image.open(pred_img_path)
gold_img = Image.open(gold_img_path)
# Check if images have different sizes and resize if necessary
if pred_img.size != gold_img.size:
logging.debug(f"Images have different sizes: {pred_img.size} vs {gold_img.size}, resizing predicted image to match gold image")
pred_img = pred_img.resize(gold_img.size, Image.Resampling.LANCZOS)
# Ensure both images are in the same mode for comparison
if pred_img.mode != gold_img.mode:
pred_img = pred_img.convert(gold_img.mode)
diff = ImageChops.difference(pred_img, gold_img)
if diff.getbbox():
return 0.0
@@ -190,6 +200,27 @@ def calculate_image_sharpness(image_path):
def structure_check_by_mse(img1, img2, threshold=0.03):
"""Check if two images are approximately the same by MSE"""
# Ensure both images are PIL Image objects
if not hasattr(img1, 'size') or not hasattr(img2, 'size'):
# Convert numpy arrays to PIL Images if needed
if hasattr(img1, 'shape'):
img1 = Image.fromarray(img1)
if hasattr(img2, 'shape'):
img2 = Image.fromarray(img2)
# Check if images have different sizes and resize if necessary
if img1.size != img2.size:
logging.debug(f"Images have different sizes: {img1.size} vs {img2.size}, resizing first image to match second")
img1 = img1.resize(img2.size, Image.Resampling.LANCZOS)
# Ensure both images are in RGB mode for consistent comparison
if img1.mode != 'RGB':
img1 = img1.convert('RGB')
if img2.mode != 'RGB':
img2 = img2.convert('RGB')
# Now calculate MSE with properly sized images
mse = np.mean(
(np.array(img1, dtype=np.float32) / 255
- np.array(img2, dtype=np.float32) / 255) ** 2)