Merge branch 'main' into xiaochuanli/addChromeExtensions

This commit is contained in:
Tianbao Xie
2024-03-08 20:45:49 +08:00
committed by GitHub
109 changed files with 7196 additions and 172 deletions

View File

@@ -21,6 +21,7 @@ from .chrome import (
is_expected_url_pattern_match,
is_added_to_steam_cart,
is_expected_installed_extensions
compare_pdf_images
)
from .docs import (
compare_font_names,
@@ -49,6 +50,9 @@ from .docs import (
check_tabstops,
compare_contains_image,
compare_docx_files_and_ignore_new_lines
compare_docx_images,
compare_image_text,
compare_references
)
from .general import (
check_csv,
@@ -69,12 +73,14 @@ from .general import (
compare_terminal_and_txt,
fuzzy_place_math,
compare_python_pure_text
diff_text_file,
literal_match
)
from .gimp import (
check_brightness_decrease_and_structure_sim,
check_contrast_increase_and_structure_sim,
check_saturation_increase_and_structure_sim,
check_image_size_and_structure_sim,
check_image_size,
check_image_mirror,
check_palette_and_structure_sim,
check_textbox_on_leftside,
@@ -87,7 +93,9 @@ from .gimp import (
increase_saturation,
decrease_brightness,
check_file_exists,
compare_triangle_positions
compare_triangle_positions,
check_sharper,
check_image_file_size
)
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
@@ -131,11 +139,17 @@ from .vscode import (
compare_text_file,
compare_config,
compare_answer,
compare_result_files,
is_extension_installed,
check_json_settings,
check_json_keybindings
check_json_keybindings,
check_python_file_by_test_suite,
check_python_file_by_gold_file,
check_html_background_image,
compare_zip_files
)
from .calc import compare_conference_city_in_order
from .others import compare_epub, check_mp3_meta
def infeasible():
pass

View File

@@ -2,9 +2,9 @@ import logging
import os
import re
import shutil
from itertools import product
from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag
@@ -97,6 +97,29 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'url']
return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0.
elif rule['type'] == "liked_authors_websites_urls":
# Check if "liked authors" folder exists
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
if liked_authors_folder:
# Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url']
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else:
return 0.
else:
raise TypeError(f"{rule['type']} not support yet!")
@@ -136,6 +159,53 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
return score / len(pdf2_path)
import fitz
from PIL import Image
from borb.pdf import Document
from borb.pdf import PDF
from pathlib import Path
import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
images = []
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
pixmap = page.get_pixmap()
img = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples)
images.append(img)
return images
def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh:
doc = PDF.loads(fh)
with open(out_path, "wb") as fh:
PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path))
images1 = extract_images_from_pdf(pdf1_path)
images2 = extract_images_from_pdf(pdf2_path)
if len(images1) != len(images2):
return 0.
for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes():
return 0.
return 1.
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""
Compare two archives. Note that the files in the archives should be of the same type.

View File

@@ -11,6 +11,7 @@ from docx.shared import RGBColor
from odf.opendocument import load
from odf.text import P
from odf.text import Span
from rapidfuzz import fuzz
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
@@ -57,6 +58,10 @@ def contains_page_break(docx_file):
def compare_docx_files(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
ignore_case = options.get('ignore_case', False)
ignore_order = options.get('ignore_order', False)
content_only = options.get('content_only', False)
def get_paragraph_texts_odt(document):
paragraphs = document.getElementsByType(P)
paragraph_texts = []
@@ -79,20 +84,37 @@ def compare_docx_files(file1, file2, **options):
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
elif file1.endswith('.odt') and file2.endswith('.odt'):
doc1 = load(file1)
doc2 = load(file2)
doc1_paragraphs = get_paragraph_texts_odt(doc1)
doc2_paragraphs = get_paragraph_texts_odt(doc2)
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
if content_only:
# Compare the content of the documents
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
similarity = fuzz.ratio(text1, text2) / 100.0
return similarity
# Process and compare documents
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
if text1 != text2:
return 0
else:
@@ -106,6 +128,8 @@ def compare_docx_files(file1, file2, **options):
print("in compare")
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
print(p1)
print(p2)
@@ -157,6 +181,44 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
from io import BytesIO
from PIL import Image
def compare_docx_images(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
def extract_images(doc):
images = []
for rel in doc.part.rels.values():
if "image" in rel.reltype:
img_data = rel.target_part.blob
images.append(BytesIO(img_data))
return images
images1 = extract_images(doc1)
images2 = extract_images(doc2)
if len(images1) != len(images2):
return 0
for img1, img2 in zip(images1, images2):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
return 0
return 1
import pytesseract
def compare_image_text(image_path, rule):
img = Image.open(image_path)
img_text = pytesseract.image_to_string(img)
if rule['type'] == 'text':
return 1 if rule['text'] in img_text else 0
else:
raise ValueError("Unsupported rule type")
def compare_line_spacing(docx_file1, docx_file2):
if not compare_docx_files(docx_file1, docx_file2):
return 0
@@ -263,7 +325,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
section = doc2.sections[0]
paragraph_width = section.page_width - section.left_margin - section.right_margin
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (
x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
minus = .0
for p1, p2 in zip(para1, para2):
# filter CLEAR tabstop and default left-0 tabstop
@@ -566,3 +628,95 @@ def compare_highlighted_text(file1, file2):
return 1
else:
return 0
def compare_references(file1, file2, **options):
reference_indicator = options.get('reference_indicator', 'References')
reference_base_result = options.get('reference_base_result', 0.5)
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
if ref1_idx == -1 and ref2_idx == -1:
return 1
if ref1_idx == -1 or ref2_idx == -1:
return 0
# split the reference section into reference items, and remove the empty string items
ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
# Compare the references
if len(ref1) != len(ref2):
return 0
total_similarity = 0
for r1, r2 in zip(ref1, ref2):
# fuzzy match the references
similarity = fuzz.ratio(r1, r2) / 100.0
total_similarity += similarity
result = total_similarity / len(ref1)
if result >= reference_base_result:
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0
def compare_answer(file1, file2, **options):
"""This is a specific function to compare the """
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
if ref1_idx == -1 and ref2_idx == -1:
return 1
if ref1_idx == -1 or ref2_idx == -1:
return 0
# split the reference section into reference items, and remove the empty string items
ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
# Compare the references
if len(ref1) != len(ref2):
return 0
total_similarity = 0
for r1, r2 in zip(ref1, ref2):
# fuzzy match the references
similarity = fuzz.ratio(r1, r2) / 100.0
total_similarity += similarity
result = total_similarity / len(ref1)
if result >= reference_base_result:
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0

View File

@@ -1,6 +1,7 @@
import csv
import functools
import json
import yaml
import operator
import re
import pdfplumber
@@ -15,9 +16,13 @@ from lxml.cssselect import CSSSelector
from lxml.etree import _Element
from rapidfuzz import fuzz
from docx import Document
import difflib
from .utils import _match_record, _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.general")
def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
if result is None:
@@ -41,6 +46,24 @@ def exact_match(result, rules) -> float:
else:
return 0.
def literal_match(result: Any, expected: Any, **options) -> float:
literal_type = options.get('type', 'str')
if literal_type == 'str':
ignore_case = options.get('ignore_case', False)
score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower()
return float(score)
elif literal_type == 'list':
if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected):
return .0
ignore_case = options.get('ignore_case', False)
result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result]
expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected]
return float(result == expected)
else:
raise NotImplementedError(f"Type {type} not supported")
def is_in_list(result, rules) -> float:
expect = rules["expected"]
if expect in result:
@@ -48,6 +71,15 @@ def is_in_list(result, rules) -> float:
else:
return 0.
def diff_text_file(result: str, expect: str) -> float:
if result is None:
return 0.
with open(result) as f:
result_lines: List[str] = f.read().splitlines()
with open(expect) as f:
expected_lines: List[str] = f.read().splitlines()
return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
def fuzzy_match(result, rules) -> float:
expect = rules["expected"]
@@ -62,7 +94,7 @@ def fuzzy_place_math(result_file_path, rules) -> float:
words_list = []
for para in doc.paragraphs:
words_list.extend(para.text.split())
# 打印出提取的单词列表
# Print out the list of extracted words
print(words_list)
for word in words_list:
if not any(ans in word for ans in expect):
@@ -140,11 +172,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
}
def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
"""
Args:
result (str): XML of GNOME Accessibility Tree
rules (Dict[str, Any]): dict like
rules (List[Dict[str, Any]]): list of dict like
{
"selectors": list of str as CSS selectors, will be connected by ", "
to form a composite selector. Only one from `selectors` and
@@ -162,30 +194,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
"""
at: _Element = lxml.etree.fromstring(result)
if "xpath" in rules:
elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in rules:
selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
total_match_score = 1.
for r in rules:
if "xpath" in r:
elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in r:
selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
if len(elements) == 0:
print("no elements")
return 0.
if len(elements) == 0:
logger.info("No elements: %s", r["xpath"] if "xpath" in r else r["selectors"])
return 0.
if "text" in rules:
match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, rules["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
if "text" in r:
match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
total_match_score *= match_score
return float(match_score)
return float(total_match_score)
# def check_existence(result: str, *args) -> float:
@@ -197,7 +232,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float:
return float(cursor.fetchone()[0] or 0)
def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float:
def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float:
"""
Args:
result (str): path to json file
@@ -212,6 +247,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
],
"unexpect": <the same as `expect`
}
is_yaml (bool): yaml rather than json
Returns:
float
@@ -220,7 +256,10 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
if result is None:
return 0.
with open(result) as f:
result: Dict[str, Any] = json.load(f)
if is_yaml:
result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader)
else:
result: Dict[str, Any] = json.load(f)
expect_rules = rules.get("expect", {})
unexpect_rules = rules.get("unexpect", {})
@@ -229,14 +268,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
for r in expect_rules:
value = result
for k in r["key"]:
value = value[k]
try:
value = value[k]
except KeyError:
return 0.
metric = metric and _match_value_to_rule(value, r)
for r in unexpect_rules:
value = result
for k in r["key"]:
value = value[k]
try:
value = value[k]
except KeyError:
value = None
break
metric = metric and not _match_value_to_rule(value, r)
return metric
return float(metric)
def check_direct_json_object(result, rules)->float:
@@ -257,6 +303,7 @@ def check_direct_json_object(result, rules)->float:
print(rules["expected"])
if result is None:
return 0.
expect_in_result = rules.get("expect_in_result", False)
if not expect_in_result:
expected_json = rules["expected"]
@@ -374,8 +421,6 @@ def compare_python_pure_text(py_file_path, gold_file_path):
content1 = file1.read()
with open(gold_file_path, 'r') as file2:
content2 = file2.read()
# 移除文件内容中的所有空白字符
content1_no_whitespace = remove_whitespace(content1)
content2_no_whitespace = remove_whitespace(content2)
# 比较处理后的文件内容
return content1_no_whitespace == content2_no_whitespace

View File

@@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat
def compare_image_list(pred_img_path_list: Union[str, List[str]],
gold_img_path_list: Union[str, List[str]]) -> float:
gold_img_path_list: Union[str, List[str]]) -> float:
""" Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
"""
if type(pred_img_path_list) != list:
@@ -177,6 +177,16 @@ def calculate_contrast(image):
return np.std(pixels)
def calculate_image_sharpness(image_path):
# Load the image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Apply the Laplacian operator
laplacian = cv2.Laplacian(image, cv2.CV_64F)
# Calculate the variance
variance = np.var(laplacian)
return variance
def structure_check_by_mse(img1, img2, threshold=0.03):
"""Check if two images are approximately the same by MSE"""
mse = np.mean(
@@ -295,7 +305,8 @@ def check_triangle_position(tgt_path):
# We assume the triangle is a different color from the background
# Find the unique colors
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True)
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0,
return_counts=True)
unique_colors_sorted = unique_colors[np.argsort(counts)]
# Assuming the background is the most common color and the triangle is a different color
@@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path):
return structure_same
def check_structure_sim_resized(src_path, tgt_path):
"""
Check if the structure of the two images are similar after resizing.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
"""
if src_path is None or tgt_path is None:
return 0.
img_src = Image.open(src_path)
img_tgt = Image.open(tgt_path)
# Resize the images to the same size
img_src = img_src.resize(img_tgt.size)
# Check if the structure is similar
structure_same = structure_check_by_ssim(img_src, img_tgt)
return structure_same
def check_contrast_increase_and_structure_sim(src_path, tgt_path):
"""
Check if the src image has higher contrast than the tgt image and the structures are similar
@@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule):
return 0.
def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None):
def check_image_size(src_path, rule):
"""
Check if the size of the src image is correct and the structure of the two images are similar.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
Check if the size of the src image is correct
multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081
"""
if src_path is None or tgt_path is None:
if src_path is None:
return 0.
# Load images
source_image = Image.open(src_path)
target_image = Image.open(tgt_path)
# Load the image
img = Image.open(src_path)
# Check size
if width is not None:
width_same = source_image.size[0] == width
else:
width_same = True
if height is not None:
height_same = source_image.size[1] == height
# Check the size
if rule["height"] is not None:
height_same = img.size[1] == rule["height"]
else:
height_same = True
if rule["width"] is not None:
width_same = img.size[0] == rule["width"]
else:
width_same = True
# Check structure
resized_target_image = target_image.resize(source_image.size)
structure_same = structure_check_by_ssim(source_image, resized_target_image)
if width_same and height_same and structure_same:
if height_same and width_same:
return 1.
else:
return 0.
@@ -521,6 +545,31 @@ def check_green_background(src_path, tgt_path):
return 1.
def check_sharper(src_path, tgt_path):
"""
Check if the source image is sharper than the target image.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
sharpness_src = calculate_image_sharpness(src_path)
sharpness_tgt = calculate_image_sharpness(tgt_path)
return 1.0 if sharpness_src > sharpness_tgt else 0.0
def check_image_file_size(src_path, rule):
"""
Check if the size of the src image within 500KB
"""
if src_path is None:
return 0.0
# Check the size
file_size = os.path.getsize(src_path)
if file_size < rule["max_size"]:
return 1.0
else:
return 0.0
if __name__ == "__main__":
actual_config_path = "../../../cache/sessionrc_test"
rule = {
@@ -550,3 +599,12 @@ if __name__ == "__main__":
tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
print(check_triangle_position(tgt_path))
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
print(check_sharper(src_path, tgt_path))
src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
rule = {
"max_size": 500000
}
print(check_image_file_size(src_path, rule))

View File

@@ -0,0 +1,128 @@
import zipfile
import os.path
import os
import lxml.html
from lxml.html import HtmlElement
from typing import List, Dict
from typing import Union, TypeVar
from mutagen.easyid3 import EasyID3
from .general import diff_text_file
from .utils import _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.others")
def process_epub(filename: str) -> List[str]:
file_list: List[str] = []
base_dir: str = filename + ".dir"
os.makedirs(base_dir, exist_ok=True)
try:
with zipfile.ZipFile(filename, "r") as z_f:
with z_f.open("toc.ncx") as in_f\
, open(os.path.join(base_dir, "toc.ncx"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
for l in contents:
if "navPoint" not in l:
out_f.write(l + "\n")
file_list.append(os.path.join(base_dir, "toc.ncx"))
with z_f.open("content.opf") as in_f\
, open(os.path.join(base_dir, "content.opf"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
for l in contents:
if "dc:identifier" not in l:
out_f.write(l + "\n")
file_list.append(os.path.join(base_dir, "content.opf"))
for f_n in z_f.namelist():
if f_n.endswith(".html"):
with z_f.open(f_n) as in_f\
, open(os.path.join(base_dir, f_n), "w") as out_f:
html: HtmlElement = lxml.html.fromstring(
''.join( filter( lambda ch: ch!="\n" and ch!="\r"
, in_f.read().decode()
)
).encode()
)
out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode"))
file_list.append(os.path.join(base_dir, f_n))
logger.debug("%s: %s", filename, file_list)
return list(sorted(file_list))
except zipfile.BadZipFile:
return []
def compare_epub(result: str, expected: str) -> float:
if result is None:
return 0.
result_files: List[str] = process_epub(result)
expected_files: List[str] = process_epub(expected)
metric: float = 1.
for f1, f2 in zip(result_files, expected_files):
current_metric: float = diff_text_file(f1, f2)
logger.debug("%s vs %s: %f", f1, f2, current_metric)
metric *= current_metric
return metric
V = TypeVar("Value")
def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool:
# checks using _match_value_to_rule
if result is None:
return 0.
id3_dict = EasyID3(result)
metric: bool = True
for k, r in meta.items():
value = id3_dict.get(k, "")
if isinstance(value, list):
value: str = ",".join(value)
logger.debug("%s.%s: %s", result, k, value)
metric = metric and _match_value_to_rule(value, r)
return float(metric)
if __name__ == "__main__":
import datetime
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3"
, { "title": { "method": "eq"
, "ref": "Missing You"
}
, "artist": { "method": "eq"
, "ref": "Cheng Xiang"
}
}
)
print(metric)

View File

@@ -182,7 +182,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
else:
return None
if get_slide_notes(slide1) != get_slide_notes(slide2) and examine_note:
if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note:
return 0
# check if the shapes are the same
for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
@@ -235,7 +235,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
return 0
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text != shape2.text and examine_text:
if shape1.text.strip() != shape2.text.strip() and examine_text:
return 0
# check if the paragraphs are the same

View File

@@ -5,19 +5,21 @@ import os.path
# import operator
from numbers import Number
from typing import Any, Union, cast, Callable, Iterable
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Set
import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
# from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
, load_filters, load_pivot_tables
from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value
from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \
, load_filters, load_pivot_tables
from rapidfuzz import fuzz
# from openpyxl.utils import coordinate_to_tuple
@@ -122,7 +124,6 @@ def compare_table(result: str, expected: str = None, **options) -> float:
worksheetr_names: List[str] = pdworkbookr.sheet_names
if expected is not None:
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(expected)
worksheete_names: List[str] = pdworkbooke.sheet_names
@@ -158,8 +159,8 @@ def compare_table(result: str, expected: str = None, **options) -> float:
return 0.
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
sheet1 = sheet1.round()
sheet2 = sheet2.round()
sheet1 = sheet1.round(error_limit)
sheet2 = sheet2.round(error_limit)
metric: bool = sheet1.equals(sheet2)
logger.debug("Sheet1: \n%s", str(sheet1))
logger.debug("Sheet2: \n%s", str(sheet2))
@@ -187,6 +188,63 @@ def compare_table(result: str, expected: str = None, **options) -> float:
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Printed Value #
elif r["type"] == "sheet_fuzzy":
# Fuzzy Match for Ranges {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# rules: list of dict, each dict is like
# { "range": ["A1:B6", "C2:E5"],
# "type": "includes" | "included_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
# "threshold": 85, // for fuzzy match
# "ignore_case": true | false,
# "ignore_chars": " ()", # filtered out
# "trim_leadings": "+ ", # filtered by lstrip
# "trim_trailings": "", # filtered by rstrip
# "normalization": [["Rd", "Road"]], # filtered by replace
# }
sheet1: Tuple[BOOK, str] = parse_idx(r["sheet_idx0"], result, expected)
sheet2: Tuple[BOOK, str] = parse_idx(r["sheet_idx1"], result, expected)
total_metric = True
for rl in r["rules"]:
for rng in MultiCellRange(rl["range"]):
for cdn in rng.cells:
coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0])
value1: str = str(read_cell_value(*sheet1, coordinate))
value2: str = str(read_cell_value(*sheet2, coordinate))
logger.debug("%s: %s vs %s", cdn, value1, value2)
for rplc in rl.get("normalization", []):
value1 = value1.replace(rplc[0], rplc[1])
value2 = value2.replace(rplc[0], rplc[1])
if "trim_leadings" in rl:
value1 = value1.lstrip(rl["trim_leadings"])
value2 = value2.lstrip(rl["trim_leadings"])
if "trim_trailings" in rl:
value1 = value1.rstrip(rl["trim_trailings"])
value2 = value2.rstrip(rl["trim_trailings"])
if "ignore_chars" in rl:
ignore_chars: Set[str] = set(rl["ignore_chars"])
value1 = "".join(filter(lambda ch: ch not in ignore_chars, value1))
value2 = "".join(filter(lambda ch: ch not in ignore_chars, value2))
if rl.get("ignore_case", False):
value1 = value1.lower()
value2 = value2.lower()
if rl["type"]=="includes":
metric: bool = value2 in value1
elif rl["type"]=="included_by":
metric: bool = value1 in value2
elif rl["type"]=="fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
elif rl["type"]=="exact_match":
metric: bool = value1==value2
total_metric = total_metric and metric
metric: bool = total_metric
logger.debug("Assertion: %s =~= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Fuzzy Match for Ranges #
elif r["type"] == "sparkline":
# Compare Sparklines {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"

View File

@@ -126,10 +126,14 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
value_str: str = ser.val.numRef.f
elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"):
value_str: str = ser.val.strRef.f
else:
value_str: str = ""
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"):
categ_str: str = ser.cat.numRef.f
elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"):
categ_str: str = ser.cat.strRef.f
else:
categ_str: str = ""
series.append("{:},{:}".format(value_str, categ_str))
series: str = ";".join(series)
@@ -272,7 +276,8 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
# }}} function load_pivot_tables #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
@@ -283,7 +288,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
for elm in str_elements
]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
@@ -309,14 +316,15 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
, namespaces=_xlsx_ns_imapping
)
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
if "@t" not in cell["c"]:
try:
if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
return float(cell["c"]["v"])
if cell["c"]["@t"] == "s":
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
except (KeyError, ValueError):
return None
if cell["c"]["@t"] == "s":
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "n":
return float(cell["c"]["v"])
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
# }}} read_cell_value #
@@ -589,7 +597,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
bool
"""
if rule["method"].startswith("re"):
if rule["method"].startswith("re"): # re.FLAGs
flags: List[str] = rule["method"].split(".")[1:]
flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags)
flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0))
@@ -602,7 +610,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
, "ge", "gt"
}:
return getattr(operator, rule["method"])(value, rule["ref"])
if rule["method"].startswith("approx"):
if rule["method"].startswith("approx"): # approx:THRESHOLD
threshold: float = float(rule["method"].split(":")[1])
logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value))
try:

View File

@@ -1,5 +1,10 @@
import copy
import importlib.util
import json
import sys
import re
from typing import Dict
import json, copy
def check_json_keybindings(actual: str, expected: str, **options) -> float:
"""
@@ -10,6 +15,7 @@ def check_json_keybindings(actual: str, expected: str, **options) -> float:
Return:
float: the score
"""
def direct_load_json(fp):
try:
with open(fp, 'r') as f:
@@ -17,7 +23,7 @@ def check_json_keybindings(actual: str, expected: str, **options) -> float:
return data
except:
return None
def skip_first_line_load_json(fp):
try:
with open(fp, 'r') as f:
@@ -54,7 +60,7 @@ def check_json_settings(actual: str, expected: str, **options) -> float:
with open(actual, 'r') as f:
data = json.load(f)
expect = expected['expected']
data_copy = copy.deepcopy(data)
data_copy.update(expect)
@@ -81,10 +87,51 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
with open(expected) as f2:
expected_text = f2.read()
ignore_blanks = options.get('ignore_blanks', False)
if ignore_blanks:
actual_text = re.sub(r'[\t\n]', ' ', actual_text).strip()
actual_text = re.sub(r'\s+', ' ', actual_text)
expected_text = re.sub(r'[\t\n]', ' ', expected_text).strip()
expected_text = re.sub(r'\s+', ' ', expected_text)
ignore_case = options.get('ignore_case', False)
if ignore_case:
actual_text = actual_text.lower()
expected_text = expected_text.lower()
if actual_text == expected_text:
return 1.0
return 0.0
import zipfile
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result zip file
expected (str): path to gold zip file
Return:
float: the score
"""
if not actual:
return 0.
with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
file_list1 = set(zip_file1.namelist())
file_list2 = set(zip_file2.namelist())
if file_list1 != file_list2:
return 0.0
for file_name in file_list1:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
return 0.0
return 1.0
def compare_config(actual: str, rules: Dict, **options) -> float:
if not actual:
@@ -128,3 +175,82 @@ def is_extension_installed(actual: str, rules: Dict, **options):
return 0.0
else:
raise NotImplementedError
def check_python_file_by_test_suite(actual_files, test_file, **options) -> float:
"""Check the python file by running the test suite in the given test file."""
test_function_name = options.get('test_function_name', 'test')
# Create a unique module name, it can be arbitrary but must be unique in the current runtime environment
module_name = 'dynamic_module'
# Load the module from the given file path
spec = importlib.util.spec_from_file_location(module_name, test_file)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module # Add the loaded module to sys.modules
spec.loader.exec_module(module) # Execute the module to make its content available
# Retrieve the function by name from the loaded module and execute it
test_function = getattr(module, test_function_name)
try:
if test_function():
return 1.0
else:
return 0.0
except Exception as e:
return 0.0
def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
pass
def check_html_background_image(src_path: str, rule: Dict = None) -> float:
"""
Check if the background image is correctly set.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
from bs4 import BeautifulSoup
with open(src_path, 'r') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
styles = soup.find_all('style')
for style in styles:
if f'background-image: url(\'{rule["value"]}\')' in style.text:
return 1.0
return 0.0
def compare_result_files(src_path, tgt_path):
"""
Compare whether the content of two files are the same.
multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
"""
with open(src_path, 'r') as f:
src_content = f.read().strip()
with open(tgt_path, 'r') as f:
tgt_content = f.read().strip()
try:
# Compare the content as numbers
tgt_content_num = float(tgt_content)
if tgt_content in src_content:
# If the content of tgt is in src, return 1.0 since output src might be
# a superset(language description+number) of tgt
return 1.0
src_content_num = float(src_content)
if abs(src_content_num - tgt_content_num) < 1e-4:
return 1.0
return 0.0
except:
if src_content == tgt_content:
return 1.0
return 0.0
if __name__ == "__main__":
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html"
rule = {
"type:": "value",
"value": "anmi_sharper.png"
}
print(check_html_background_image(src_path, rule))