Merge remote-tracking branch 'upstream/main' into fix_chrome
This commit is contained in:
@@ -2,6 +2,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import io
|
||||
from itertools import product
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
@@ -200,6 +201,7 @@ import fitz
|
||||
from PIL import Image
|
||||
from borb.pdf import Document
|
||||
from borb.pdf import PDF
|
||||
import imagehash
|
||||
|
||||
from pathlib import Path
|
||||
import typing
|
||||
@@ -208,6 +210,9 @@ import typing
|
||||
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||
if not pdf1_path or not pdf2_path:
|
||||
return 0.
|
||||
if not all(map(os.path.exists, [pdf1_path, pdf2_path])):
|
||||
logger.warning(f"PDF file does not exist: {pdf1_path} or {pdf2_path}")
|
||||
return 0.
|
||||
|
||||
def extract_images_from_pdf(pdf_path):
|
||||
pdf_document = fitz.open(pdf_path)
|
||||
@@ -215,35 +220,61 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||
|
||||
for page_number in range(pdf_document.page_count):
|
||||
page = pdf_document[page_number]
|
||||
pixmap = page.get_pixmap()
|
||||
|
||||
img = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples)
|
||||
|
||||
images.append(img)
|
||||
for img_index, img in enumerate(page.get_images(full=True)):
|
||||
xref = img[0]
|
||||
base_image = pdf_document.extract_image(xref)
|
||||
image_bytes = base_image["image"]
|
||||
|
||||
# convert to PIL Image
|
||||
try:
|
||||
pil_image = Image.open(io.BytesIO(image_bytes))
|
||||
images.append(pil_image)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process image in {pdf_path} on page {page_number}: {e}")
|
||||
|
||||
return images
|
||||
|
||||
temp_dir = Path(pdf1_path).parent / "temp_pdf_comparison"
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
|
||||
temp_pdf1 = temp_dir / Path(pdf1_path).name
|
||||
temp_pdf2 = temp_dir / Path(pdf2_path).name
|
||||
|
||||
def fix_pdf(in_path: Path, out_path: Path) -> None:
|
||||
doc: typing.Optional[Document] = None
|
||||
with open(in_path, "rb") as fh:
|
||||
doc = PDF.loads(fh)
|
||||
with open(out_path, "wb") as fh:
|
||||
PDF.dumps(fh, doc)
|
||||
shutil.copy(pdf1_path, temp_pdf1)
|
||||
shutil.copy(pdf2_path, temp_pdf2)
|
||||
|
||||
fix_pdf(Path(pdf1_path), Path(pdf1_path))
|
||||
fix_pdf(Path(pdf2_path), Path(pdf2_path))
|
||||
try:
|
||||
images1 = extract_images_from_pdf(str(temp_pdf1))
|
||||
images2 = extract_images_from_pdf(str(temp_pdf2))
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting images from PDFs: {e}")
|
||||
shutil.rmtree(temp_dir)
|
||||
return 0.
|
||||
finally:
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
images1 = extract_images_from_pdf(pdf1_path)
|
||||
images2 = extract_images_from_pdf(pdf2_path)
|
||||
|
||||
if len(images1) != len(images2):
|
||||
logger.info(f"Different number of images found. Gold: {len(images1)}, Pred: {len(images2)}")
|
||||
return 0.
|
||||
|
||||
for img1, img2 in zip(images1, images2):
|
||||
if img1.tobytes() != img2.tobytes():
|
||||
return 0.
|
||||
if not images1:
|
||||
logger.info("No images found in either PDF. Considering it a match.")
|
||||
return 1.0
|
||||
|
||||
return 1.
|
||||
hash_threshold = 5
|
||||
total_score = 0
|
||||
for i, (img1, img2) in enumerate(zip(images1, images2)):
|
||||
hash1 = imagehash.phash(img1)
|
||||
hash2 = imagehash.phash(img2)
|
||||
hash_diff = hash1 - hash2
|
||||
|
||||
logger.info(f"Image {i+1}: Gold hash: {hash1}, Pred hash: {hash2}, Hash difference: {hash_diff}")
|
||||
|
||||
if hash_diff <= hash_threshold:
|
||||
total_score +=1
|
||||
|
||||
return total_score / len(images1)
|
||||
|
||||
|
||||
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
|
||||
|
||||
@@ -86,6 +86,7 @@ def compare_docx_files(file1, file2, **options):
|
||||
ignore_case = options.get('ignore_case', False)
|
||||
ignore_order = options.get('ignore_order', False)
|
||||
content_only = options.get('content_only', False)
|
||||
fuzzy_match = options.get('fuzzy_match', False)
|
||||
delete_empty_lines = options.get('delete_empty_lines', False)
|
||||
|
||||
if not file1 or not file2:
|
||||
@@ -158,29 +159,48 @@ def compare_docx_files(file1, file2, **options):
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
if ignore_case:
|
||||
text1, text2 = text1.lower(), text2.lower()
|
||||
if text1 != text2:
|
||||
return 0
|
||||
|
||||
if fuzzy_match:
|
||||
similarity = fuzz.ratio(text1, text2) / 100.0
|
||||
return similarity
|
||||
else:
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
print("ignore_blanks=false")
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
print(doc1_paragraphs)
|
||||
print(doc2_paragraphs)
|
||||
print(len(doc1_paragraphs))
|
||||
print(len(doc2_paragraphs))
|
||||
return 0
|
||||
print("in compare")
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
# show the difference
|
||||
print("=== First Paragraph ===")
|
||||
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
|
||||
print("=== Second Paragraph ===")
|
||||
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
|
||||
print("=" * 50) # Clear boundary
|
||||
return 0
|
||||
|
||||
if fuzzy_match:
|
||||
total_similarity = 0
|
||||
if not doc1_paragraphs:
|
||||
return 1.0
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
total_similarity += fuzz.ratio(p1, p2) / 100.0
|
||||
|
||||
if len(doc1_paragraphs) == 0:
|
||||
return 1.0 if len(doc2_paragraphs) == 0 else 0.0
|
||||
|
||||
avg_similarity = total_similarity / len(doc1_paragraphs)
|
||||
return avg_similarity
|
||||
else:
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
# show the difference
|
||||
print("=== First Paragraph ===")
|
||||
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
|
||||
print("=== Second Paragraph ===")
|
||||
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
|
||||
print("=" * 50) # Clear boundary
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
@@ -178,18 +178,43 @@ def check_list(result: str, rules: Dict[str, List[str]]) -> float:
|
||||
return float(all(expect_metrics) and unexpect_metric)
|
||||
|
||||
|
||||
_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
|
||||
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
|
||||
, "cp": "uri:deskat:component.at-spi.gnome.org"
|
||||
, "doc": "uri:deskat:document.at-spi.gnome.org"
|
||||
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
|
||||
, "txt": "uri:deskat:text.at-spi.gnome.org"
|
||||
, "val": "uri:deskat:value.at-spi.gnome.org"
|
||||
, "act": "uri:deskat:action.at-spi.gnome.org"
|
||||
}
|
||||
_accessibility_ns_map = {
|
||||
"ubuntu": {
|
||||
"st": "https://accessibility.ubuntu.example.org/ns/state",
|
||||
"attr": "https://accessibility.ubuntu.example.org/ns/attributes",
|
||||
"cp": "https://accessibility.ubuntu.example.org/ns/component",
|
||||
"doc": "https://accessibility.ubuntu.example.org/ns/document",
|
||||
"docattr": "https://accessibility.ubuntu.example.org/ns/document/attributes",
|
||||
"txt": "https://accessibility.ubuntu.example.org/ns/text",
|
||||
"val": "https://accessibility.ubuntu.example.org/ns/value",
|
||||
"act": "https://accessibility.ubuntu.example.org/ns/action",
|
||||
},
|
||||
"windows": {
|
||||
"st": "https://accessibility.windows.example.org/ns/state",
|
||||
"attr": "https://accessibility.windows.example.org/ns/attributes",
|
||||
"cp": "https://accessibility.windows.example.org/ns/component",
|
||||
"doc": "https://accessibility.windows.example.org/ns/document",
|
||||
"docattr": "https://accessibility.windows.example.org/ns/document/attributes",
|
||||
"txt": "https://accessibility.windows.example.org/ns/text",
|
||||
"val": "https://accessibility.windows.example.org/ns/value",
|
||||
"act": "https://accessibility.windows.example.org/ns/action",
|
||||
"class": "https://accessibility.windows.example.org/ns/class"
|
||||
},
|
||||
"macos": {
|
||||
"st": "https://accessibility.macos.example.org/ns/state",
|
||||
"attr": "https://accessibility.macos.example.org/ns/attributes",
|
||||
"cp": "https://accessibility.macos.example.org/ns/component",
|
||||
"doc": "https://accessibility.macos.example.org/ns/document",
|
||||
"txt": "https://accessibility.macos.example.org/ns/text",
|
||||
"val": "https://accessibility.macos.example.org/ns/value",
|
||||
"act": "https://accessibility.macos.example.org/ns/action",
|
||||
"role": "https://accessibility.macos.example.org/ns/role",
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]], osname: str = "ubuntu") -> float:
|
||||
"""
|
||||
Args:
|
||||
result (str): XML of GNOME Accessibility Tree
|
||||
@@ -205,18 +230,21 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||
"exact": bool specifying whether exact match or fuzzy match should
|
||||
be performed. defaults to True.
|
||||
}
|
||||
osname (str): "ubuntu" | "windows" | "macos". "ubuntu" by default.
|
||||
|
||||
Returns:
|
||||
float
|
||||
"""
|
||||
|
||||
a11y_ns_map = _accessibility_ns_map[osname]
|
||||
|
||||
at: _Element = lxml.etree.fromstring(result)
|
||||
total_match_score = 1.
|
||||
for r in rules:
|
||||
if "xpath" in r:
|
||||
elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map)
|
||||
elements: List[_Element] = at.xpath(r["xpath"], namespaces=a11y_ns_map)
|
||||
elif "selectors" in r:
|
||||
selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map)
|
||||
selector = CSSSelector(", ".join(r["selectors"]), namespaces=a11y_ns_map)
|
||||
elements: List[_Element] = selector(at)
|
||||
else:
|
||||
raise ValueError("At least one of xpath and selectors is required")
|
||||
@@ -307,6 +335,9 @@ def check_direct_json_object(result, rules) -> float:
|
||||
One of the most commonly used function to evalute.
|
||||
Compare two json objects directly.
|
||||
"""
|
||||
logger.info(f"[DEBUG] check_direct_json_object called with result: {result}")
|
||||
logger.info(f"[DEBUG] check_direct_json_object called with rules: {rules}")
|
||||
|
||||
if isinstance(result, str):
|
||||
# remove blanks before and after result
|
||||
result = result.strip()
|
||||
@@ -314,45 +345,84 @@ def check_direct_json_object(result, rules) -> float:
|
||||
result = result.replace("'", '"')
|
||||
# load json object
|
||||
result = json.loads(result)
|
||||
|
||||
logger.info(f"[DEBUG] Processed result: {result}")
|
||||
|
||||
if result is None:
|
||||
logger.info("[DEBUG] Result is None, returning 0.0")
|
||||
return 0.
|
||||
|
||||
# Check if expected value contains evaluation failure indicator
|
||||
try:
|
||||
expected_json = rules.get("expected", {})
|
||||
if expected_json:
|
||||
for key, value in expected_json.items():
|
||||
if value == "__EVALUATION_FAILED__":
|
||||
logger.error(f"[DEBUG] Expected value for key '{key}' indicates evaluation failure, returning 0.0")
|
||||
return 0.
|
||||
except Exception as e:
|
||||
logger.error(f"[DEBUG] Error checking for evaluation failure indicator: {e}")
|
||||
return 0.
|
||||
try:
|
||||
expect_in_result = rules.get("expect_in_result", False)
|
||||
logger.info(f"[DEBUG] expect_in_result: {expect_in_result}")
|
||||
|
||||
if not expect_in_result:
|
||||
expected_json = rules["expected"]
|
||||
logger.info(f"[DEBUG] Expected JSON: {expected_json}")
|
||||
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
actual_value = result.get(key)
|
||||
logger.info(f"[DEBUG] Checking key '{key}': expected='{expected_value}', actual='{actual_value}'")
|
||||
|
||||
if expected_json.get("ignore_list_order", False):
|
||||
expected_value = sorted(expected_value)
|
||||
result_value = sorted(result.get(key))
|
||||
logger.info(f"[DEBUG] Comparing lists (sorted): expected={expected_value}, actual={result_value}")
|
||||
if expected_value != result_value:
|
||||
logger.info(f"[DEBUG] List comparison failed for key '{key}', returning 0.0")
|
||||
return 0.
|
||||
else:
|
||||
if expected_value != result.get(key):
|
||||
if expected_value != actual_value:
|
||||
logger.info(f"[DEBUG] Value comparison failed for key '{key}': expected='{expected_value}', actual='{actual_value}', returning 0.0")
|
||||
return 0.
|
||||
else:
|
||||
logger.info(f"[DEBUG] Value comparison passed for key '{key}'")
|
||||
|
||||
logger.info("[DEBUG] All comparisons passed, returning 1.0")
|
||||
return 1.0
|
||||
else:
|
||||
expected_json = rules["expected"]
|
||||
logger.info(f"[DEBUG] Expected JSON (expect_in_result mode): {expected_json}")
|
||||
|
||||
for key in expected_json.keys():
|
||||
if isinstance(expected_json.get(key), list):
|
||||
flag = 0
|
||||
expected_value_list = expected_json.get(key)
|
||||
logger.info(f"[DEBUG] Checking list key '{key}': expected_list={expected_value_list}, actual='{result.get(key)}'")
|
||||
for each_expected_value in expected_value_list:
|
||||
if isinstance(result.get(key), list) and each_expected_value in result.get(key):
|
||||
flag = 1
|
||||
logger.info(f"[DEBUG] Found expected value '{each_expected_value}' in result list for key '{key}'")
|
||||
break
|
||||
if flag == 0:
|
||||
logger.info(f"[DEBUG] No expected values found in result list for key '{key}', returning 0.0")
|
||||
return 0.
|
||||
elif isinstance(expected_json.get(key), str):
|
||||
if expected_json.get(key) not in result.get(key):
|
||||
expected_str = expected_json.get(key)
|
||||
actual_str = result.get(key)
|
||||
logger.info(f"[DEBUG] Checking string key '{key}': expected='{expected_str}', actual='{actual_str}'")
|
||||
if expected_str not in actual_str:
|
||||
logger.info(f"[DEBUG] Expected string '{expected_str}' not found in actual string '{actual_str}' for key '{key}', returning 0.0")
|
||||
return 0.
|
||||
else:
|
||||
logger.debug("check_direct_json_object: expected value type not supported")
|
||||
return 0.
|
||||
logger.info("[DEBUG] All expect_in_result comparisons passed, returning 1.0")
|
||||
return 1.0
|
||||
except:
|
||||
logger.debug("check_direct_json_object: result is not a valid json object")
|
||||
except Exception as e:
|
||||
logger.debug(f"check_direct_json_object: result is not a valid json object, error: {e}")
|
||||
return 0.
|
||||
|
||||
|
||||
@@ -361,7 +431,7 @@ def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
|
||||
return 0
|
||||
|
||||
# open the speedtest results file(csv)
|
||||
date_col = None
|
||||
#date_col = None
|
||||
try:
|
||||
with open(speedtest_result_path, 'r') as f:
|
||||
for i, line in enumerate(f):
|
||||
@@ -476,37 +546,66 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):
|
||||
|
||||
def compare_python_pure_text(py_file_path, gold_file_path):
|
||||
if not py_file_path or not gold_file_path:
|
||||
return 0
|
||||
return 0.0
|
||||
|
||||
# first, change the suffix of gold_file from .txt to .py
|
||||
print("py_file_path: ")
|
||||
print(py_file_path)
|
||||
print("gold_file_path: ")
|
||||
print(gold_file_path)
|
||||
def _normalize(text):
|
||||
"""
|
||||
Minimal normalization - only handle basic formatting:
|
||||
- Skip obvious file metadata (encoding, shebang) at the beginning
|
||||
- Normalize whitespace and indentation
|
||||
- Remove empty lines
|
||||
|
||||
This preserves any content that shouldn't be there (like markdown)
|
||||
so it can be detected as an error.
|
||||
"""
|
||||
lines = text.splitlines()
|
||||
result_lines = []
|
||||
i = 0
|
||||
|
||||
# Only skip obvious metadata at the very beginning
|
||||
while i < len(lines) and i < 3: # Check only first 3 lines
|
||||
stripped = lines[i].strip()
|
||||
|
||||
if (stripped.startswith('#!') or
|
||||
stripped.startswith('# -*- coding:') or
|
||||
stripped.startswith('# coding:') or
|
||||
stripped.startswith('# coding=')):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
# Process all remaining lines with minimal filtering
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped: # Keep all non-empty lines
|
||||
normalized = line.expandtabs(4).rstrip()
|
||||
result_lines.append(normalized)
|
||||
|
||||
i += 1
|
||||
|
||||
return '\n'.join(result_lines)
|
||||
|
||||
# gold_file_path = gold_file_path.replace('.txt', '.py')
|
||||
def remove_whitespace(text):
|
||||
return ''.join(text.split())
|
||||
|
||||
with open(py_file_path, 'r') as file1:
|
||||
content1 = file1.read()
|
||||
with open(gold_file_path, 'r') as file2:
|
||||
content2 = file2.read()
|
||||
content1_no_whitespace = remove_whitespace(content1)
|
||||
content2_no_whitespace = remove_whitespace(content2)
|
||||
if content1_no_whitespace == content2_no_whitespace:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(check_direct_json_object([], rules={
|
||||
"relativeTime": {
|
||||
"from": "5th next month"
|
||||
},
|
||||
"expected": {
|
||||
"start": "SEA",
|
||||
"end": "NYC",
|
||||
"time": "{DoW}, {Month} {DayD}, {Year}",
|
||||
"category": "Miles"
|
||||
}}))
|
||||
try:
|
||||
with open(py_file_path, 'r', encoding='utf-8') as file1:
|
||||
user_content = file1.read()
|
||||
with open(gold_file_path, 'r', encoding='utf-8') as file2:
|
||||
gold_content = file2.read()
|
||||
|
||||
# Apply different normalization strategies
|
||||
user_normalized = _normalize(user_content)
|
||||
gold_normalized = _normalize(gold_content)
|
||||
|
||||
if user_normalized == gold_normalized:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
except (FileNotFoundError, IOError, UnicodeDecodeError) as e:
|
||||
logger.debug(f"compare_python_pure_text: Error reading files - {e}")
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.debug(f"compare_python_pure_text: Unexpected error - {e}")
|
||||
return 0.0
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import logging
|
||||
from typing import List, Union
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
from PIL import Image, ImageChops, ImageStat
|
||||
@@ -39,7 +40,7 @@ def get_gimp_export_path():
|
||||
return current_path
|
||||
except FileNotFoundError:
|
||||
# Handle the case where the configuration file is not found
|
||||
print("GIMP configuration file not found")
|
||||
logging.debug("GIMP configuration file not found")
|
||||
return False
|
||||
|
||||
|
||||
@@ -193,18 +194,18 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
|
||||
(np.array(img1, dtype=np.float32) / 255
|
||||
- np.array(img2, dtype=np.float32) / 255) ** 2)
|
||||
structure_same = True if mse < threshold else False
|
||||
print("MSE: ", mse)
|
||||
logging.debug(f"MSE: {mse}, threshold: {threshold}")
|
||||
return structure_same
|
||||
|
||||
|
||||
def structure_check_by_ssim(img1, img2, threshold=0.9):
|
||||
"""Check if two images are approximately the same by SSIM"""
|
||||
similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1)
|
||||
print("SSIM: ", similarity)
|
||||
logging.debug("SSIM: %s", similarity)
|
||||
return similarity >= threshold
|
||||
|
||||
|
||||
def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
|
||||
def check_brightness_decrease_and_structure_sim(src_path, tgt_path, threshold=0.03):
|
||||
"""
|
||||
Check the brightness of src is lower than tgt and the structures are similar
|
||||
gimp:7a4deb26-d57d-4ea9-9a73-630f66a7b568
|
||||
@@ -219,13 +220,15 @@ def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
|
||||
brightness_src = calculate_brightness(img_src)
|
||||
brightness_tgt = calculate_brightness(img_tgt)
|
||||
brightness_reduced = brightness_tgt > brightness_src
|
||||
|
||||
# print(f"Brightness src: {brightness_src}, tgt: {brightness_tgt}, reduced: {brightness_reduced}")
|
||||
|
||||
# Normalize and compare images
|
||||
target_brightness = 128
|
||||
img_src_normalized = normalize_brightness(img_src, target_brightness)
|
||||
img_tgt_normalized = normalize_brightness(img_tgt, target_brightness)
|
||||
|
||||
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized)
|
||||
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized, threshold=threshold)
|
||||
if brightness_reduced and structure_same:
|
||||
return 1.
|
||||
else:
|
||||
@@ -362,11 +365,37 @@ def check_structure_sim_resized(src_path, tgt_path):
|
||||
img_src = Image.open(src_path)
|
||||
img_tgt = Image.open(tgt_path)
|
||||
|
||||
# Resize the images to the same size
|
||||
img_src = img_src.resize(img_tgt.size)
|
||||
# Check if source image has transparency and extract content area
|
||||
if img_src.mode in ('RGBA', 'LA') or 'transparency' in img_src.info:
|
||||
if img_src.mode != 'RGBA':
|
||||
img_src = img_src.convert('RGBA')
|
||||
|
||||
# Get alpha channel and find bounding box of non-transparent pixels
|
||||
alpha = img_src.split()[-1]
|
||||
bbox = alpha.getbbox()
|
||||
|
||||
if bbox is None:
|
||||
# Image is completely transparent
|
||||
logging.debug("Source image is completely transparent")
|
||||
return 0.
|
||||
|
||||
# Crop to content area only
|
||||
img_src_content = img_src.crop(bbox)
|
||||
logging.debug(f"Source image cropped from {img_src.size} to {img_src_content.size}")
|
||||
|
||||
# Convert to RGB for comparison
|
||||
img_src_content = img_src_content.convert('RGB')
|
||||
img_src_resized = img_src_content.resize(img_tgt.size)
|
||||
else:
|
||||
# No transparency, resize normally
|
||||
img_src_resized = img_src.resize(img_tgt.size)
|
||||
|
||||
# Ensure target image is RGB for comparison
|
||||
if img_tgt.mode != 'RGB':
|
||||
img_tgt = img_tgt.convert('RGB')
|
||||
|
||||
# Check if the structure is similar
|
||||
structure_same = structure_check_by_ssim(img_src, img_tgt)
|
||||
structure_same = structure_check_by_ssim(img_src_resized, img_tgt)
|
||||
return structure_same
|
||||
|
||||
|
||||
@@ -431,20 +460,52 @@ def check_image_size(src_path, rule):
|
||||
|
||||
# Load the image
|
||||
img = Image.open(src_path)
|
||||
|
||||
# Check if we should ignore transparent parts
|
||||
ignore_transparent = rule.get("ignore_transparent", False)
|
||||
|
||||
if ignore_transparent and img.mode in ('RGBA', 'LA') or 'transparency' in img.info:
|
||||
# Calculate bounding box of non-transparent pixels
|
||||
if img.mode != 'RGBA':
|
||||
img = img.convert('RGBA')
|
||||
|
||||
# Get alpha channel
|
||||
alpha = img.split()[-1]
|
||||
|
||||
# Find bounding box of non-transparent pixels
|
||||
bbox = alpha.getbbox()
|
||||
|
||||
if bbox is None:
|
||||
# Image is completely transparent
|
||||
actual_width = 0
|
||||
actual_height = 0
|
||||
else:
|
||||
# Calculate actual content size
|
||||
actual_width = bbox[2] - bbox[0]
|
||||
actual_height = bbox[3] - bbox[1]
|
||||
|
||||
logging.debug(f"Original size: {img.size}, Content size: {actual_width}x{actual_height}")
|
||||
else:
|
||||
# Use original image size
|
||||
actual_width = img.size[0]
|
||||
actual_height = img.size[1]
|
||||
logging.debug(f"Image size: {img.size}")
|
||||
|
||||
# Check the size
|
||||
if rule.get("height", None) is not None:
|
||||
height_same = img.size[1] == rule["height"]
|
||||
height_same = actual_height == rule["height"]
|
||||
else:
|
||||
height_same = True
|
||||
if rule.get("width", None) is not None:
|
||||
width_same = img.size[0] == rule["width"]
|
||||
width_same = actual_width == rule["width"]
|
||||
else:
|
||||
width_same = True
|
||||
|
||||
if height_same and width_same:
|
||||
logging.debug(f"height_same: {height_same}, width_same: {width_same}")
|
||||
return 1.
|
||||
else:
|
||||
logging.debug(f"height_same: {height_same}, width_same: {width_same}")
|
||||
return 0.
|
||||
|
||||
|
||||
|
||||
@@ -63,11 +63,13 @@ def compare_epub(result: str, expected: str) -> float:
|
||||
result_files: List[str] = process_epub(result)
|
||||
expected_files: List[str] = process_epub(expected)
|
||||
|
||||
metric: float = 1.
|
||||
metric: float = 0.
|
||||
for f1, f2 in zip(result_files, expected_files):
|
||||
current_metric: float = diff_text_file(f1, f2)
|
||||
logger.debug("%s vs %s: %f", f1, f2, current_metric)
|
||||
metric *= current_metric
|
||||
metric += current_metric
|
||||
if len(result_files) > 0:
|
||||
metric /= len(result_files)
|
||||
return metric
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from math import sqrt
|
||||
|
||||
from pptx import Presentation
|
||||
from pptx.util import Inches
|
||||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.slides")
|
||||
|
||||
@@ -139,6 +140,17 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
prs1 = Presentation(file1_path)
|
||||
prs2 = Presentation(file2_path)
|
||||
|
||||
approximately_tolerance = options.get("approximately_tolerance", 0.005)
|
||||
def is_approximately_equal(val1, val2, tolerance=approximately_tolerance):
|
||||
"""Compare two values with a tolerance of 0.1% (0.005)"""
|
||||
if val1 == val2:
|
||||
return True
|
||||
if val1 == 0 and val2 == 0:
|
||||
return True
|
||||
if val1 == 0 or val2 == 0:
|
||||
return False
|
||||
return abs(val1 - val2) / max(abs(val1), abs(val2)) <= tolerance
|
||||
|
||||
examine_number_of_slides = options.get("examine_number_of_slides", True)
|
||||
examine_shape = options.get("examine_shape", True)
|
||||
examine_text = options.get("examine_text", True)
|
||||
@@ -212,14 +224,20 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
if hasattr(shape1, "text") and hasattr(shape2, "text") and shape1.text == shape2.text:
|
||||
if shape1.text == "Product Comparison" and (shape1.top <= shape2.top or shape1.top < 3600000):
|
||||
return 0
|
||||
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
elif (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
return 0
|
||||
|
||||
if examine_table_bottom_position:
|
||||
if slide_idx == 3 and shape1.shape_type == 19 and shape2.shape_type == 19:
|
||||
if shape1.top <= shape2.top or shape1.top < 3600000:
|
||||
return 0
|
||||
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
elif (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
return 0
|
||||
|
||||
if examine_right_position:
|
||||
@@ -231,34 +249,62 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
if slide_idx == 2 and shape1.shape_type == 13 and shape2.shape_type == 13:
|
||||
if shape1.top >= shape2.top or shape1.top > 1980000:
|
||||
return 0
|
||||
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
elif (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
return 0
|
||||
|
||||
if examine_shape_for_shift_size:
|
||||
if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
if (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
if not (hasattr(shape1, "text") and hasattr(shape2,
|
||||
"text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
|
||||
return 0
|
||||
|
||||
if (
|
||||
shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
|
||||
not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)) and examine_shape:
|
||||
return 0
|
||||
|
||||
if examine_image_size:
|
||||
if shape1.shape_type == 13 and shape2.shape_type == 13:
|
||||
if shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
if not is_approximately_equal(shape1.width, shape2.width) or not is_approximately_equal(shape1.height, shape2.height):
|
||||
return 0
|
||||
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
elif (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
return 0
|
||||
|
||||
if examine_modify_height:
|
||||
if not hasattr(shape1, "text") and not hasattr(shape2,
|
||||
"text") or shape1.shape_type == 5 and shape2.shape_type == 5:
|
||||
if shape1.height != shape2.height:
|
||||
if not is_approximately_equal(shape1.height, shape2.height):
|
||||
return 0
|
||||
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
|
||||
elif (not is_approximately_equal(shape1.left, shape2.left) or
|
||||
not is_approximately_equal(shape1.top, shape2.top) or
|
||||
not is_approximately_equal(shape1.width, shape2.width) or
|
||||
not is_approximately_equal(shape1.height, shape2.height)):
|
||||
return 0
|
||||
|
||||
if shape1.shape_type == MSO_SHAPE_TYPE.TABLE:
|
||||
table1 = shape1.table
|
||||
table2 = shape2.table
|
||||
for row_idx in range(len(table1.rows)):
|
||||
for col_idx in range(len(table1.columns)):
|
||||
cell1 = table1.cell(row_idx, col_idx)
|
||||
cell2 = table2.cell(row_idx, col_idx)
|
||||
|
||||
for para1, para2 in zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs):
|
||||
for run1, run2 in zip(para1.runs, para2.runs):
|
||||
if run1.font.color.rgb != run2.font.color.rgb:
|
||||
return 0
|
||||
|
||||
if hasattr(shape1, "text") and hasattr(shape2, "text"):
|
||||
if shape1.text.strip() != shape2.text.strip() and examine_text:
|
||||
return 0
|
||||
@@ -288,15 +334,19 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
return 0
|
||||
|
||||
if run1.font.italic != run2.font.italic and examine_font_italic:
|
||||
return 0
|
||||
if run1.font.italic is not None and run2.font.italic is not None:
|
||||
return 0
|
||||
|
||||
if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
|
||||
if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
|
||||
return 0
|
||||
|
||||
if run1.font.underline != run2.font.underline and examine_font_underline:
|
||||
return 0
|
||||
|
||||
if run1.font.underline is not None and run2.font.underline is not None:
|
||||
return 0
|
||||
if (run1.font.underline is None and run2.font.underline is True) or (run1.font.underline is True and run2.font.underline is None):
|
||||
return 0
|
||||
|
||||
if run1.font._element.attrib.get('strike', 'noStrike') != run2.font._element.attrib.get(
|
||||
'strike', 'noStrike') and examine_strike_through:
|
||||
return 0
|
||||
@@ -325,14 +375,20 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
color = "No Color"
|
||||
|
||||
text = "".join(t.text for t in paragraph.findall('.//a:t', namespaces))
|
||||
|
||||
bullets.append((lvl, char, text, color))
|
||||
|
||||
# Only add non-empty paragraphs to bullets list
|
||||
if text.strip():
|
||||
bullets.append((lvl, char, text, color))
|
||||
|
||||
return bullets
|
||||
|
||||
if examine_bullets and _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets(
|
||||
run2.part.blob.decode('utf-8')):
|
||||
return 0
|
||||
if examine_bullets:
|
||||
bullets1 = _extract_bullets(run1.part.blob.decode('utf-8'))
|
||||
bullets2 = _extract_bullets(run2.part.blob.decode('utf-8'))
|
||||
|
||||
# Compare only non-empty bullets
|
||||
if bullets1 != bullets2:
|
||||
return 0
|
||||
|
||||
# fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data
|
||||
|
||||
@@ -446,17 +502,13 @@ def check_left_panel(accessibility_tree):
|
||||
|
||||
root = ET.fromstring(accessibility_tree)
|
||||
|
||||
for root_pane in root.iter('root-pane'):
|
||||
for panel in root_pane.iter('panel'):
|
||||
for split_pane in panel.iter('split-pane'):
|
||||
# Get the left panel
|
||||
if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)":
|
||||
# Get the visible attribute
|
||||
visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st']))
|
||||
if visible:
|
||||
# decide if it is left panel
|
||||
return 1.
|
||||
# 遍历所有 document-frame 节点
|
||||
for doc_frame in root.iter('document-frame'):
|
||||
if doc_frame.attrib.get("name") == "Slides View":
|
||||
# 说明 Slides View 存在,即左侧面板已打开
|
||||
return 1.
|
||||
|
||||
# 没找到 Slides View,认为左侧面板未打开
|
||||
return 0.
|
||||
|
||||
|
||||
|
||||
@@ -308,7 +308,7 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=
|
||||
|
||||
# If a video ends, then check if both ended to confirm they are of the same length
|
||||
if not ret1 or not ret2:
|
||||
return ret1 == ret2
|
||||
return 1. if ret1 == ret2 else 0. # return float only
|
||||
|
||||
# Convert frames to PIL Images
|
||||
frame1 = Image.fromarray(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
|
||||
|
||||
Reference in New Issue
Block a user