merge
This commit is contained in:
@@ -24,6 +24,7 @@ from .chrome import (
|
||||
get_gotoRecreationPage_and_get_html_content,
|
||||
get_url_dashPart,
|
||||
get_active_url_from_accessTree,
|
||||
get_find_installed_extension_name,
|
||||
get_info_from_website
|
||||
)
|
||||
from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
|
||||
@@ -31,7 +32,8 @@ from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command
|
||||
from .gimp import get_gimp_config_file
|
||||
from .impress import get_audio_in_slide, get_background_image_in_slide
|
||||
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
|
||||
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime
|
||||
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
|
||||
from .replay import get_replay
|
||||
from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
|
||||
from .vscode import get_vscode_config
|
||||
from .calc import get_conference_city_in_order
|
||||
15
desktop_env/evaluators/getters/calc.py
Normal file
15
desktop_env/evaluators/getters/calc.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import csv
|
||||
|
||||
# I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
|
||||
def get_conference_city_in_order(env, config):
|
||||
# read the csv file
|
||||
csv_path = config['csv_path']
|
||||
print(f"Reading csv file from {csv_path}")
|
||||
with open(csv_path, 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
# skip the header row
|
||||
next(reader)
|
||||
# get the third column in the order of rows
|
||||
conference_city_list = [row[2] for row in reader]
|
||||
return conference_city_list
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
import platform
|
||||
import sqlite3
|
||||
import time
|
||||
from urllib.parse import unquote
|
||||
from typing import Dict, Any, List
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
@@ -1010,6 +1011,43 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
|
||||
return "Google"
|
||||
|
||||
|
||||
def get_find_installed_extension_name(env, config: Dict[str, str]):
|
||||
os_type = env.vm_platform
|
||||
if os_type == 'Windows':
|
||||
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
|
||||
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
|
||||
elif os_type == 'Darwin':
|
||||
preference_file_path = env.controller.execute_python_command(
|
||||
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
|
||||
'output'].strip()
|
||||
elif os_type == 'Linux':
|
||||
if "arm" in platform.machine():
|
||||
preference_file_path = env.controller.execute_python_command(
|
||||
"import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
|
||||
'output'].strip()
|
||||
else:
|
||||
preference_file_path = env.controller.execute_python_command(
|
||||
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
|
||||
'output'].strip()
|
||||
|
||||
else:
|
||||
raise Exception('Unsupported operating system')
|
||||
|
||||
try:
|
||||
content = env.controller.get_file(preference_file_path)
|
||||
data = json.loads(content)
|
||||
# Preferences store all the path of installed extensions, return them all and let metrics try to find one matches the targeted extension path
|
||||
all_extensions_name = []
|
||||
all_extensions = data.get('extensions', {}).get('settings', {})
|
||||
for id in all_extensions.keys():
|
||||
name = all_extensions[id]["manifest"]["name"]
|
||||
all_extensions_name.append(name)
|
||||
return all_extensions_name
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return "Google"
|
||||
|
||||
|
||||
def get_data_delete_automacally(env, config: Dict[str, str]):
|
||||
"""
|
||||
This function is used to open th "auto-delete" mode of chromium
|
||||
@@ -1037,8 +1075,8 @@ def get_data_delete_automacally(env, config: Dict[str, str]):
|
||||
try:
|
||||
content = env.controller.get_file(preference_file_path)
|
||||
data = json.loads(content)
|
||||
data_delete_state = data["profile"]["exit_type"]
|
||||
return data_delete_state
|
||||
data_delete_state = data["profile"].get("default_content_setting_values", None)
|
||||
return "true" if data_delete_state is not None else "false"
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return "Google"
|
||||
@@ -1077,6 +1115,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
|
||||
"""
|
||||
active_tab_url = get_active_url_from_accessTree(env, config)
|
||||
if not isinstance(active_tab_url, str):
|
||||
logger.error("active_tab_url is not a string")
|
||||
return None
|
||||
host = env.vm_ip
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
@@ -1109,12 +1148,14 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
|
||||
for context in browser.contexts:
|
||||
for page in context.pages:
|
||||
page.wait_for_load_state("networkidle")
|
||||
if page.url == active_tab_url:
|
||||
# the accTree and playwright can get encoding(percent-encoding) characters, we need to convert them to normal characters
|
||||
if unquote(page.url) == unquote(active_tab_url):
|
||||
target_page = page
|
||||
print("tartget page url: ", target_page.url)
|
||||
print("tartget page title: ", target_page.title())
|
||||
print("\33[32mtartget page url: ", target_page.url, "\33[0m")
|
||||
print("\33[32mtartget page title: ", target_page.title(), "\33[0m")
|
||||
break
|
||||
if target_page is None:
|
||||
logger.error("Your tab is not the target tab.")
|
||||
return {}
|
||||
return_json = {}
|
||||
if config["category"] == "class":
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from typing import Dict, List, Set
|
||||
from typing import Optional, Any, Union
|
||||
from datetime import datetime
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
@@ -77,21 +78,37 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
|
||||
gives (List[int]): optional. defaults to [0]. which files are directly
|
||||
returned to the metric. if len==1, str is returned; else, list is
|
||||
returned.
|
||||
only support for single file now:
|
||||
time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
|
||||
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
|
||||
"""
|
||||
|
||||
time_format = "%Y_%m_%d"
|
||||
if not config.get("multi", False):
|
||||
paths: List[str] = [config["path"]]
|
||||
dests: List[str] = [config["dest"]]
|
||||
print(config)
|
||||
if "time_suffix" in config.keys() and config["time_suffix"]:
|
||||
if "time_format" in config.keys():
|
||||
time_format = config["time_format"]
|
||||
# Insert time before . in file type suffix
|
||||
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
|
||||
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
|
||||
else:
|
||||
paths: List[str] = config["path"]
|
||||
dests: List[str] = config["dest"]
|
||||
print(paths)
|
||||
print(dests)
|
||||
|
||||
cache_paths: List[str] = []
|
||||
|
||||
gives: Set[int] = set(config.get("gives", [0]))
|
||||
|
||||
for i, (p, d) in enumerate(zip(paths, dests)):
|
||||
print("env cache_dir: ")
|
||||
print(env.cache_dir)
|
||||
_path = os.path.join(env.cache_dir, d)
|
||||
|
||||
print("_path: ")
|
||||
print(_path)
|
||||
file = env.controller.get_file(p)
|
||||
if file is None:
|
||||
#return None
|
||||
@@ -104,7 +121,9 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
|
||||
cache_paths.append(_path)
|
||||
with open(_path, "wb") as f:
|
||||
f.write(file)
|
||||
|
||||
# debug
|
||||
print("cache_paths")
|
||||
print(cache_paths)
|
||||
return cache_paths[0] if len(cache_paths)==1 else cache_paths
|
||||
|
||||
|
||||
|
||||
@@ -195,3 +195,10 @@ def get_accessibility_tree(env, *args) -> str:
|
||||
accessibility_tree: str = env.controller.get_accessibility_tree()
|
||||
logger.debug("AT@eval: %s", accessibility_tree)
|
||||
return accessibility_tree
|
||||
|
||||
def get_time_diff_range(env, config) -> str:
|
||||
try:
|
||||
return config["diff_range_in_minutes"]
|
||||
except:
|
||||
logger.error("diff_range_in_minutes not found in config.")
|
||||
return None
|
||||
@@ -2,7 +2,8 @@ from .basic_os import (
|
||||
check_gnome_favorite_apps,
|
||||
is_utc_0,
|
||||
check_text_enlarged,
|
||||
check_moved_jpgs
|
||||
check_moved_jpgs,
|
||||
is_in_vm_clickboard
|
||||
)
|
||||
from .chrome import (
|
||||
is_expected_tabs,
|
||||
@@ -19,6 +20,7 @@ from .chrome import (
|
||||
is_expected_active_tab,
|
||||
is_expected_url_pattern_match,
|
||||
is_added_to_steam_cart,
|
||||
is_expected_installed_extensions,
|
||||
compare_pdf_images
|
||||
)
|
||||
from .docs import (
|
||||
@@ -47,6 +49,7 @@ from .docs import (
|
||||
check_file_exists,
|
||||
check_tabstops,
|
||||
compare_contains_image,
|
||||
compare_docx_files_and_ignore_new_lines,
|
||||
compare_docx_images,
|
||||
compare_image_text,
|
||||
compare_references
|
||||
@@ -62,6 +65,14 @@ from .general import (
|
||||
fuzzy_match,
|
||||
check_include_exclude,
|
||||
check_direct_json_object,
|
||||
compare_time_in_speedtest_results,
|
||||
is_included_all_json_objects,
|
||||
is_gold_text_included_in_pdf,
|
||||
check_csv_line_number,
|
||||
file_contains,
|
||||
compare_terminal_and_txt,
|
||||
fuzzy_place_math,
|
||||
compare_python_pure_text,
|
||||
diff_text_file,
|
||||
literal_match
|
||||
)
|
||||
@@ -69,7 +80,7 @@ from .gimp import (
|
||||
check_brightness_decrease_and_structure_sim,
|
||||
check_contrast_increase_and_structure_sim,
|
||||
check_saturation_increase_and_structure_sim,
|
||||
check_image_size_and_structure_sim,
|
||||
check_image_size,
|
||||
check_image_mirror,
|
||||
check_palette_and_structure_sim,
|
||||
check_textbox_on_leftside,
|
||||
@@ -82,7 +93,9 @@ from .gimp import (
|
||||
increase_saturation,
|
||||
decrease_brightness,
|
||||
check_file_exists,
|
||||
compare_triangle_positions
|
||||
compare_triangle_positions,
|
||||
check_sharper,
|
||||
check_image_file_size
|
||||
)
|
||||
from .libreoffice import check_libre_locale
|
||||
from .pdf import check_pdf_pages
|
||||
@@ -126,13 +139,16 @@ from .vscode import (
|
||||
compare_text_file,
|
||||
compare_config,
|
||||
compare_answer,
|
||||
compare_result_files,
|
||||
is_extension_installed,
|
||||
check_json_settings,
|
||||
check_json_keybindings,
|
||||
check_python_file_by_test_suite,
|
||||
check_python_file_by_gold_file,
|
||||
check_html_background_image,
|
||||
compare_zip_files
|
||||
)
|
||||
from .calc import compare_conference_city_in_order
|
||||
from .others import compare_epub, check_mp3_meta
|
||||
|
||||
def infeasible():
|
||||
|
||||
@@ -56,3 +56,15 @@ def check_moved_jpgs(directory_list, rule):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def is_in_vm_clickboard(config, terminal_output):
|
||||
print("terminal_output: ")
|
||||
print(terminal_output)
|
||||
print("config: ")
|
||||
print(config)
|
||||
expected_results = config["expected"]
|
||||
# check if terminal_output has expected results
|
||||
if not isinstance(expected_results, list):
|
||||
return 1 if expected_results in terminal_output else 0
|
||||
else:
|
||||
return 1 if all(result in terminal_output for result in expected_results) else 0
|
||||
27
desktop_env/evaluators/metrics/calc.py
Normal file
27
desktop_env/evaluators/metrics/calc.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import openpyxl
|
||||
|
||||
def compare_conference_city_in_order( actual_city_list_path, expected_city):
|
||||
expected_city_list = expected_city["expected"]
|
||||
print(f"Reading csv file from {actual_city_list_path}")
|
||||
wb = openpyxl.load_workbook(actual_city_list_path)
|
||||
sheet = wb.active
|
||||
actual_city_list = []
|
||||
for row in sheet["C2:C22"]:
|
||||
for cell in row:
|
||||
actual_city_list.append(cell.value)
|
||||
# expected_city is the city that we want to compare with the actual city list
|
||||
# must in order index
|
||||
# debug
|
||||
print("expected_city_list:")
|
||||
print(expected_city_list)
|
||||
print("actual_city_list_path:")
|
||||
print(actual_city_list)
|
||||
wrong_list = []
|
||||
try:
|
||||
for i in range(len(actual_city_list)):
|
||||
if expected_city_list[i] not in actual_city_list[i]:
|
||||
wrong_list.append(i)
|
||||
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
|
||||
except:
|
||||
return False
|
||||
return True if len(wrong_list) == 0 else False
|
||||
@@ -2,9 +2,9 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from itertools import product
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import rapidfuzz.fuzz as fuzz
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
@@ -61,6 +61,12 @@ def is_expected_url_pattern_match(result, rules) -> float:
|
||||
return 1.
|
||||
|
||||
|
||||
def is_expected_installed_extensions(installed_extensions, expected) -> float:
|
||||
print("installed_extensions: ")
|
||||
print(installed_extensions)
|
||||
expected_extensions = expected["expected"]
|
||||
return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
|
||||
|
||||
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Checks if the expected tabs are open in Chrome.
|
||||
@@ -94,12 +100,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
|
||||
elif rule['type'] == "liked_authors_websites_urls":
|
||||
# Check if "liked authors" folder exists
|
||||
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
|
||||
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
|
||||
if liked_authors_folder:
|
||||
# Check if it contains the specified URLs
|
||||
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
|
||||
bookmark['type'] == 'url']
|
||||
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
|
||||
|
||||
urls = rule['urls']
|
||||
|
||||
for idx, url in enumerate(urls):
|
||||
if isinstance(url, str):
|
||||
urls[idx] = [url]
|
||||
|
||||
combinations = product(*urls)
|
||||
|
||||
for combination in combinations:
|
||||
if set(combination) == set(liked_authors_urls):
|
||||
return 1.
|
||||
return 0.
|
||||
else:
|
||||
return 0.
|
||||
else:
|
||||
@@ -140,15 +158,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
|
||||
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
|
||||
return score / len(pdf2_path)
|
||||
|
||||
|
||||
import fitz
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
from borb.pdf import Document
|
||||
from borb.pdf import PDF
|
||||
|
||||
from pathlib import Path
|
||||
import typing
|
||||
|
||||
|
||||
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||
def extract_images_from_pdf(pdf_path):
|
||||
pdf_document = fitz.open(pdf_path)
|
||||
@@ -163,14 +182,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||
images.append(img)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def fix_pdf(in_path: Path, out_path: Path) -> None:
|
||||
doc: typing.Optional[Document] = None
|
||||
with open(in_path, "rb") as fh:
|
||||
doc = PDF.loads(fh)
|
||||
with open(out_path, "wb") as fh:
|
||||
PDF.dumps(fh, doc)
|
||||
|
||||
|
||||
fix_pdf(Path(pdf1_path), Path(pdf1_path))
|
||||
fix_pdf(Path(pdf2_path), Path(pdf2_path))
|
||||
|
||||
@@ -183,7 +202,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
|
||||
for img1, img2 in zip(images1, images2):
|
||||
if img1.tobytes() != img2.tobytes():
|
||||
return 0.
|
||||
|
||||
|
||||
return 1.
|
||||
|
||||
|
||||
|
||||
28
desktop_env/evaluators/metrics/demo.py
Normal file
28
desktop_env/evaluators/metrics/demo.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import fitz # PyMuPDF
|
||||
|
||||
def extract_answers_from_pdf(pdf_file):
|
||||
# 打开PDF文件
|
||||
doc = fitz.open(pdf_file)
|
||||
answers = []
|
||||
|
||||
# 遍历每一页
|
||||
for page in doc:
|
||||
# 提取当前页的文本
|
||||
text = page.get_text()
|
||||
# 分割文本为行
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
if line.strip(): # 排除空白行
|
||||
# 分割等号,提取答案
|
||||
parts = line.split('=')
|
||||
if len(parts) > 1:
|
||||
answer = parts[-1].strip() # 取等号后的部分为答案
|
||||
answers.append(answer)
|
||||
|
||||
return answers
|
||||
|
||||
# 假设你的文件名是'math_problems.pdf'
|
||||
pdf_file = '/Users/lxc/Desktop/calculus.pdf'
|
||||
answers = extract_answers_from_pdf(pdf_file)
|
||||
for i, answer in enumerate(answers, 1):
|
||||
print(f"题目{i}的答案是: {answer}")
|
||||
@@ -3,8 +3,10 @@ import os
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from PIL import Image
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
|
||||
from docx.shared import RGBColor
|
||||
@@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules):
|
||||
default_font = None
|
||||
expected_font = rules["font_name"]
|
||||
|
||||
if not config_file_path:
|
||||
return 0
|
||||
|
||||
try:
|
||||
tree = ET.parse(config_file_path)
|
||||
root = tree.getroot()
|
||||
@@ -42,6 +47,9 @@ def find_default_font(config_file_path, rules):
|
||||
|
||||
|
||||
def contains_page_break(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
|
||||
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
@@ -62,6 +70,9 @@ def compare_docx_files(file1, file2, **options):
|
||||
ignore_order = options.get('ignore_order', False)
|
||||
content_only = options.get('content_only', False)
|
||||
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
def get_paragraph_texts_odt(document):
|
||||
paragraphs = document.getElementsByType(P)
|
||||
paragraph_texts = []
|
||||
@@ -118,20 +129,30 @@ def compare_docx_files(file1, file2, **options):
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
print("ignore_blanks=false")
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
print(doc1_paragraphs)
|
||||
print(doc2_paragraphs)
|
||||
print(len(doc1_paragraphs))
|
||||
print(len(doc2_paragraphs))
|
||||
return 0
|
||||
|
||||
print("in compare")
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
print(p1)
|
||||
print(p2)
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def compare_init_lines(file1, file2):
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
|
||||
@@ -149,6 +170,9 @@ def compare_init_lines(file1, file2):
|
||||
|
||||
|
||||
def compare_docx_tables(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
|
||||
@@ -174,11 +198,10 @@ def compare_docx_tables(docx_file1, docx_file2):
|
||||
return 1
|
||||
|
||||
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def compare_docx_images(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
|
||||
@@ -212,6 +235,9 @@ def compare_image_text(image_path, rule):
|
||||
|
||||
|
||||
def compare_line_spacing(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(docx_file1, docx_file2):
|
||||
return 0
|
||||
doc1 = Document(docx_file1)
|
||||
@@ -233,6 +259,9 @@ def compare_line_spacing(docx_file1, docx_file2):
|
||||
|
||||
|
||||
def compare_insert_equation(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(docx_file1, docx_file2):
|
||||
return 0
|
||||
|
||||
@@ -248,6 +277,9 @@ def compare_insert_equation(docx_file1, docx_file2):
|
||||
|
||||
|
||||
def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
expected_font = rules["font_name"]
|
||||
|
||||
@@ -260,6 +292,9 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
|
||||
|
||||
|
||||
def compare_subscript_contains(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
|
||||
@@ -272,6 +307,9 @@ def compare_subscript_contains(docx_file1, docx_file2):
|
||||
|
||||
|
||||
def has_page_numbers_in_footers(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
|
||||
for section in doc.sections:
|
||||
@@ -286,6 +324,9 @@ def has_page_numbers_in_footers(docx_file):
|
||||
|
||||
|
||||
def is_first_line_centered(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
first_paragraph = doc.paragraphs[0]
|
||||
|
||||
@@ -294,11 +335,16 @@ def is_first_line_centered(docx_file):
|
||||
|
||||
|
||||
def check_file_exists(directory, filename):
|
||||
if not directory or not filename:
|
||||
return 0
|
||||
file_path = os.path.join(directory, filename)
|
||||
return 1 if os.path.isfile(file_path) else 0
|
||||
|
||||
|
||||
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
|
||||
if not docx_file1 or not docx_file2:
|
||||
return .0
|
||||
|
||||
doc1: Document = Document(docx_file1)
|
||||
doc2: Document = Document(docx_file2)
|
||||
para1 = [p for p in doc1.paragraphs if p.text.strip()]
|
||||
@@ -334,6 +380,9 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
|
||||
|
||||
|
||||
def compare_contains_image(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
|
||||
@@ -346,6 +395,9 @@ def compare_contains_image(docx_file1, docx_file2):
|
||||
|
||||
|
||||
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||
if not file_path1 or not file_path2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
@@ -380,6 +432,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||
|
||||
|
||||
def check_highlighted_words(file_path1, file_path2):
|
||||
if not file_path1 or not file_path2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
|
||||
@@ -402,6 +457,9 @@ def check_highlighted_words(file_path1, file_path2):
|
||||
|
||||
|
||||
def evaluate_strike_through_last_paragraph(file_path1, file_path2):
|
||||
if not file_path1 or not file_path2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
@@ -418,6 +476,9 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):
|
||||
|
||||
|
||||
def evaluate_conversion(file_path):
|
||||
if not file_path:
|
||||
return 0
|
||||
|
||||
document = Document(file_path)
|
||||
|
||||
for table in document.tables:
|
||||
@@ -437,6 +498,9 @@ def evaluate_conversion(file_path):
|
||||
|
||||
|
||||
def evaluate_spacing(file_path):
|
||||
if not file_path:
|
||||
return 0
|
||||
|
||||
document = Document(file_path)
|
||||
|
||||
# Check line spacing for introduction, body, and conclusion
|
||||
@@ -450,6 +514,9 @@ def evaluate_spacing(file_path):
|
||||
|
||||
|
||||
def check_italic_font_size_14(path1, path2):
|
||||
if not path1 or not path2:
|
||||
return 0
|
||||
|
||||
if not compare_docx_files(path1, path2):
|
||||
return 0
|
||||
document = Document(path1)
|
||||
@@ -463,6 +530,9 @@ def check_italic_font_size_14(path1, path2):
|
||||
|
||||
|
||||
def evaluate_alignment(docx_path):
|
||||
if not docx_path:
|
||||
return 0
|
||||
|
||||
# Load the document
|
||||
doc = Document(docx_path)
|
||||
|
||||
@@ -492,6 +562,9 @@ def evaluate_alignment(docx_path):
|
||||
|
||||
|
||||
def get_unique_train_ids(initial_file): # fixed standard
|
||||
if not initial_file:
|
||||
return set(), 0
|
||||
|
||||
doc = Document(initial_file)
|
||||
train_ids = set()
|
||||
processed_lines = 0
|
||||
@@ -508,6 +581,9 @@ def get_unique_train_ids(initial_file): # fixed standard
|
||||
|
||||
|
||||
def check_no_duplicates(initial_file, processed_file):
|
||||
if not initial_file or not processed_file:
|
||||
return 0
|
||||
|
||||
# Open the document
|
||||
train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
|
||||
doc_processed = Document(processed_file)
|
||||
@@ -535,6 +611,9 @@ def check_no_duplicates(initial_file, processed_file):
|
||||
|
||||
|
||||
def compare_docx_lines(file1, file2):
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
# Read the text of the document, line by line
|
||||
doc1 = Document(file1)
|
||||
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
|
||||
@@ -551,8 +630,47 @@ def compare_docx_lines(file1, file2):
|
||||
return 0
|
||||
|
||||
|
||||
def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
|
||||
ignore_blanks = options.get('ignore_blanks', True)
|
||||
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
# First, delete all the blank in paragraphs
|
||||
doc1 = [p for p in doc1.paragraphs if p.text != '']
|
||||
doc2 = [p for p in doc2.paragraphs if p.text != '']
|
||||
doc1_paragraphs = [p.text for p in doc1]
|
||||
doc2_paragraphs = [p.text for p in doc2]
|
||||
else:
|
||||
# Unsupported file types or mismatch
|
||||
print("Unsupported file types or mismatch between file types.")
|
||||
return 0
|
||||
|
||||
# Process and compare documents
|
||||
if ignore_blanks:
|
||||
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
return 0
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
# Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
|
||||
def compare_highlighted_text(file1, file2):
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
def extract_highlighted_text(file_path):
|
||||
highlighted_texts = []
|
||||
|
||||
@@ -590,6 +708,9 @@ def compare_highlighted_text(file1, file2):
|
||||
|
||||
|
||||
def compare_references(file1, file2, **options):
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
reference_indicator = options.get('reference_indicator', 'References')
|
||||
reference_base_result = options.get('reference_base_result', 0.5)
|
||||
|
||||
@@ -634,48 +755,3 @@ def compare_references(file1, file2, **options):
|
||||
return (result - reference_base_result) / (1 - reference_base_result)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def compare_answer(file1, file2, **options):
|
||||
"""This is a specific function to compare the """
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
else:
|
||||
# Unsupported file types or mismatch
|
||||
print("Unsupported file types or mismatch between file types.")
|
||||
return 0
|
||||
|
||||
# Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
|
||||
ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
|
||||
ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
|
||||
|
||||
if ref1_idx == -1 and ref2_idx == -1:
|
||||
return 1
|
||||
|
||||
if ref1_idx == -1 or ref2_idx == -1:
|
||||
return 0
|
||||
|
||||
# split the reference section into reference items, and remove the empty string items
|
||||
ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
|
||||
ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
|
||||
|
||||
# Compare the references
|
||||
|
||||
if len(ref1) != len(ref2):
|
||||
return 0
|
||||
|
||||
total_similarity = 0
|
||||
for r1, r2 in zip(ref1, ref2):
|
||||
# fuzzy match the references
|
||||
similarity = fuzz.ratio(r1, r2) / 100.0
|
||||
total_similarity += similarity
|
||||
|
||||
result = total_similarity / len(ref1)
|
||||
if result >= reference_base_result:
|
||||
return (result - reference_base_result) / (1 - reference_base_result)
|
||||
else:
|
||||
return 0
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import csv
|
||||
import datetime
|
||||
import difflib
|
||||
import functools
|
||||
import json
|
||||
import yaml
|
||||
import logging
|
||||
import operator
|
||||
import re
|
||||
import sqlite3
|
||||
@@ -10,17 +12,19 @@ from typing import Callable, Any, Union
|
||||
from typing import Dict, List, Pattern
|
||||
|
||||
import lxml.etree
|
||||
import pandas as pd
|
||||
import pdfplumber
|
||||
import yaml
|
||||
from docx import Document
|
||||
from lxml.cssselect import CSSSelector
|
||||
from lxml.etree import _Element
|
||||
from rapidfuzz import fuzz
|
||||
import difflib
|
||||
|
||||
from .utils import _match_record, _match_value_to_rule
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.general")
|
||||
|
||||
|
||||
def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
|
||||
if result is None:
|
||||
return 0.
|
||||
@@ -68,6 +72,7 @@ def is_in_list(result, rules) -> float:
|
||||
else:
|
||||
return 0.
|
||||
|
||||
|
||||
def diff_text_file(result: str, expect: str) -> float:
|
||||
if result is None:
|
||||
return 0.
|
||||
@@ -78,12 +83,32 @@ def diff_text_file(result: str, expect: str) -> float:
|
||||
expected_lines: List[str] = f.read().splitlines()
|
||||
return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
|
||||
|
||||
|
||||
def fuzzy_match(result, rules) -> float:
|
||||
expect = rules["expected"]
|
||||
|
||||
return fuzz.ratio(result, expect) / 100.
|
||||
|
||||
|
||||
def fuzzy_place_math(result_file_path, rules) -> float:
|
||||
expect = rules["expected"] # a list of possible answers
|
||||
# read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
|
||||
doc = Document(result_file_path)
|
||||
words_list = []
|
||||
for para in doc.paragraphs:
|
||||
words_list.extend(para.text.split())
|
||||
# Print out the list of extracted words
|
||||
print("Your Answers: ")
|
||||
print(words_list)
|
||||
fuzzy_score_list = []
|
||||
for word in words_list:
|
||||
max_score = 0
|
||||
for ans in expect:
|
||||
score = fuzz.ratio(word, ans)
|
||||
max_score = max(max_score, score)
|
||||
fuzzy_score_list.append(max_score)
|
||||
return sum(fuzzy_score_list) / len(fuzzy_score_list)
|
||||
|
||||
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
|
||||
"""
|
||||
Args:
|
||||
@@ -191,10 +216,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||
return 0.
|
||||
|
||||
if "text" in r:
|
||||
match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
|
||||
else (lambda a, b: fuzz.ratio(a, b) / 100.)
|
||||
, r["text"]
|
||||
)
|
||||
match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
|
||||
else (lambda a, b: fuzz.ratio(a, b) / 100.)
|
||||
, r["text"]
|
||||
)
|
||||
match_score: Number = 0
|
||||
for elm in elements:
|
||||
match_score = max(match_score, match_func(elm.text or None))
|
||||
@@ -267,20 +292,157 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
|
||||
return float(metric)
|
||||
|
||||
|
||||
def check_direct_json_object(result, rules)->float:
|
||||
def check_direct_json_object(result, rules) -> float:
|
||||
"""
|
||||
One of the most commonly used function to evalute.
|
||||
Compare two json objects directly.
|
||||
"""
|
||||
if isinstance(result, str):
|
||||
# remove blanks before and after result
|
||||
result = result.strip()
|
||||
# replace all ' with "
|
||||
result = result.replace("'", '"')
|
||||
# load json object
|
||||
result = json.loads(result)
|
||||
print("result: ")
|
||||
print(result)
|
||||
print("expected: ")
|
||||
print(rules["expected"])
|
||||
if result is None:
|
||||
return 0.
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value != result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
|
||||
expect_in_result = rules.get("expect_in_result", False)
|
||||
if not expect_in_result:
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value != result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
else:
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value not in result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
|
||||
|
||||
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
|
||||
if not speedtest_result_path:
|
||||
return 0
|
||||
|
||||
# open the speedtest results file(csv)
|
||||
date_col = None
|
||||
with open(speedtest_result_path, 'r') as f:
|
||||
reader = pd.read_csv(f)
|
||||
for column in reader.columns:
|
||||
if column.startswith('TEST_DATE'):
|
||||
date_col = column
|
||||
break
|
||||
now_date_time = datetime.datetime.now().strftime('%H:%M')
|
||||
for date in reader[date_col]:
|
||||
date_time = date[-5:]
|
||||
# compare the date time with the current date time, if time diff less than time_diff para, then return true
|
||||
if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
|
||||
'%H:%M')).total_seconds()) / 60 < int(
|
||||
time_diff):
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def is_included_all_json_objects(gold_file_path, result_file_path):
|
||||
if not gold_file_path or not result_file_path:
|
||||
return 0
|
||||
|
||||
print("gold_file_path: ")
|
||||
print(gold_file_path)
|
||||
print("result_file_path: ")
|
||||
print(result_file_path)
|
||||
# two json file, check if all the key-value pair in gold_file_path is included in result_file_path
|
||||
with open(gold_file_path, 'r') as f:
|
||||
gold_json = json.load(f)
|
||||
with open(result_file_path, 'r') as fr:
|
||||
result_json = json.load(fr)
|
||||
for key in gold_json.keys():
|
||||
if key not in result_json.keys() or gold_json[key] != result_json[key]:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
|
||||
print("gold_text_path: ")
|
||||
print(gold_text_path)
|
||||
print("pdf_file_path: ")
|
||||
print(pdf_file_path)
|
||||
# gold file is a json file, we need to check all the value in json are included in pdf file.
|
||||
with open(gold_text_path, 'r') as f:
|
||||
gold_json = json.load(f)
|
||||
with pdfplumber.open(pdf_file_path) as pdf:
|
||||
text = ''
|
||||
for page in pdf.pages:
|
||||
text += page.extract_text()
|
||||
false_list = []
|
||||
for key in gold_json.keys():
|
||||
if gold_json[key] not in text:
|
||||
false_list.append(key)
|
||||
if len(false_list) > 0:
|
||||
print("false_list: ")
|
||||
print(false_list)
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def file_contains(file_path, config):
|
||||
# file_path ends with .txt
|
||||
if not file_path:
|
||||
return 1
|
||||
with open(file_path, 'r') as f:
|
||||
file_text = f.read()
|
||||
for text in config["expected"]:
|
||||
if text not in file_text:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def check_csv_line_number(file_path, line_number):
|
||||
# check file_path suffix
|
||||
if not file_path.endswith('.csv'):
|
||||
return 0
|
||||
# check line number
|
||||
with open(file_path, 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
line_count = sum(1 for row in reader)
|
||||
return 1 if line_count == int(line_number["expected"]) else 0
|
||||
|
||||
|
||||
def compare_terminal_and_txt(txt_file_path, terminal_output):
|
||||
# read txt file content
|
||||
with open(txt_file_path, 'r') as f:
|
||||
txt_file_content = f.read()
|
||||
# compare terminal output with txt file content
|
||||
return 1 if terminal_output == txt_file_content else 0
|
||||
|
||||
|
||||
def compare_python_pure_text(py_file_path, gold_file_path):
|
||||
# first, change the suffix of gold_file from .txt to .py
|
||||
print("py_file_path: ")
|
||||
print(py_file_path)
|
||||
print("gold_file_path: ")
|
||||
print(gold_file_path)
|
||||
|
||||
# gold_file_path = gold_file_path.replace('.txt', '.py')
|
||||
def remove_whitespace(text):
|
||||
return ''.join(text.split())
|
||||
|
||||
with open(py_file_path, 'r') as file1:
|
||||
content1 = file1.read()
|
||||
with open(gold_file_path, 'r') as file2:
|
||||
content2 = file2.read()
|
||||
content1_no_whitespace = remove_whitespace(content1)
|
||||
content2_no_whitespace = remove_whitespace(content2)
|
||||
if content1_no_whitespace == content2_no_whitespace:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
@@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat
|
||||
|
||||
|
||||
def compare_image_list(pred_img_path_list: Union[str, List[str]],
|
||||
gold_img_path_list: Union[str, List[str]]) -> float:
|
||||
gold_img_path_list: Union[str, List[str]]) -> float:
|
||||
""" Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
|
||||
"""
|
||||
if type(pred_img_path_list) != list:
|
||||
@@ -177,6 +177,16 @@ def calculate_contrast(image):
|
||||
return np.std(pixels)
|
||||
|
||||
|
||||
def calculate_image_sharpness(image_path):
|
||||
# Load the image in grayscale
|
||||
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
||||
# Apply the Laplacian operator
|
||||
laplacian = cv2.Laplacian(image, cv2.CV_64F)
|
||||
# Calculate the variance
|
||||
variance = np.var(laplacian)
|
||||
return variance
|
||||
|
||||
|
||||
def structure_check_by_mse(img1, img2, threshold=0.03):
|
||||
"""Check if two images are approximately the same by MSE"""
|
||||
mse = np.mean(
|
||||
@@ -295,7 +305,8 @@ def check_triangle_position(tgt_path):
|
||||
|
||||
# We assume the triangle is a different color from the background
|
||||
# Find the unique colors
|
||||
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True)
|
||||
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0,
|
||||
return_counts=True)
|
||||
unique_colors_sorted = unique_colors[np.argsort(counts)]
|
||||
|
||||
# Assuming the background is the most common color and the triangle is a different color
|
||||
@@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path):
|
||||
return structure_same
|
||||
|
||||
|
||||
def check_structure_sim_resized(src_path, tgt_path):
|
||||
"""
|
||||
Check if the structure of the two images are similar after resizing.
|
||||
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
|
||||
"""
|
||||
if src_path is None or tgt_path is None:
|
||||
return 0.
|
||||
|
||||
img_src = Image.open(src_path)
|
||||
img_tgt = Image.open(tgt_path)
|
||||
|
||||
# Resize the images to the same size
|
||||
img_src = img_src.resize(img_tgt.size)
|
||||
|
||||
# Check if the structure is similar
|
||||
structure_same = structure_check_by_ssim(img_src, img_tgt)
|
||||
return structure_same
|
||||
|
||||
|
||||
def check_contrast_increase_and_structure_sim(src_path, tgt_path):
|
||||
"""
|
||||
Check if the src image has higher contrast than the tgt image and the structures are similar
|
||||
@@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule):
|
||||
return 0.
|
||||
|
||||
|
||||
def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None):
|
||||
def check_image_size(src_path, rule):
|
||||
"""
|
||||
Check if the size of the src image is correct and the structure of the two images are similar.
|
||||
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
|
||||
Check if the size of the src image is correct
|
||||
multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081
|
||||
"""
|
||||
|
||||
if src_path is None or tgt_path is None:
|
||||
if src_path is None:
|
||||
return 0.
|
||||
|
||||
# Load images
|
||||
source_image = Image.open(src_path)
|
||||
target_image = Image.open(tgt_path)
|
||||
# Load the image
|
||||
img = Image.open(src_path)
|
||||
|
||||
# Check size
|
||||
if width is not None:
|
||||
width_same = source_image.size[0] == width
|
||||
else:
|
||||
width_same = True
|
||||
if height is not None:
|
||||
height_same = source_image.size[1] == height
|
||||
# Check the size
|
||||
if rule["height"] is not None:
|
||||
height_same = img.size[1] == rule["height"]
|
||||
else:
|
||||
height_same = True
|
||||
if rule["width"] is not None:
|
||||
width_same = img.size[0] == rule["width"]
|
||||
else:
|
||||
width_same = True
|
||||
|
||||
# Check structure
|
||||
resized_target_image = target_image.resize(source_image.size)
|
||||
structure_same = structure_check_by_ssim(source_image, resized_target_image)
|
||||
|
||||
if width_same and height_same and structure_same:
|
||||
if height_same and width_same:
|
||||
return 1.
|
||||
else:
|
||||
return 0.
|
||||
@@ -521,6 +545,31 @@ def check_green_background(src_path, tgt_path):
|
||||
return 1.
|
||||
|
||||
|
||||
def check_sharper(src_path, tgt_path):
|
||||
"""
|
||||
Check if the source image is sharper than the target image.
|
||||
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
|
||||
"""
|
||||
sharpness_src = calculate_image_sharpness(src_path)
|
||||
sharpness_tgt = calculate_image_sharpness(tgt_path)
|
||||
return 1.0 if sharpness_src > sharpness_tgt else 0.0
|
||||
|
||||
|
||||
def check_image_file_size(src_path, rule):
|
||||
"""
|
||||
Check if the size of the src image within 500KB
|
||||
"""
|
||||
if src_path is None:
|
||||
return 0.0
|
||||
|
||||
# Check the size
|
||||
file_size = os.path.getsize(src_path)
|
||||
if file_size < rule["max_size"]:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
actual_config_path = "../../../cache/sessionrc_test"
|
||||
rule = {
|
||||
@@ -550,3 +599,12 @@ if __name__ == "__main__":
|
||||
tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
|
||||
print(check_triangle_position(tgt_path))
|
||||
|
||||
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
|
||||
tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
|
||||
print(check_sharper(src_path, tgt_path))
|
||||
|
||||
src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
|
||||
rule = {
|
||||
"max_size": 500000
|
||||
}
|
||||
print(check_image_file_size(src_path, rule))
|
||||
@@ -1,10 +1,12 @@
|
||||
import builtins
|
||||
import datetime
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import operator
|
||||
import re
|
||||
import zipfile
|
||||
import pandas as pd
|
||||
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
||||
from typing import Dict, List, Set, Match, Tuple, Pattern
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
@@ -229,3 +229,54 @@ def check_python_file_by_test_suite(actual_files, test_file, **options) -> float
|
||||
|
||||
def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
|
||||
pass
|
||||
|
||||
|
||||
def check_html_background_image(src_path: str, rule: Dict = None) -> float:
|
||||
"""
|
||||
Check if the background image is correctly set.
|
||||
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
with open(src_path, 'r') as f:
|
||||
html_content = f.read()
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
styles = soup.find_all('style')
|
||||
for style in styles:
|
||||
if f'background-image: url(\'{rule["value"]}\')' in style.text:
|
||||
return 1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def compare_result_files(src_path, tgt_path):
|
||||
"""
|
||||
Compare whether the content of two files are the same.
|
||||
multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
|
||||
"""
|
||||
with open(src_path, 'r') as f:
|
||||
src_content = f.read().strip()
|
||||
with open(tgt_path, 'r') as f:
|
||||
tgt_content = f.read().strip()
|
||||
try:
|
||||
# Compare the content as numbers
|
||||
tgt_content_num = float(tgt_content)
|
||||
if tgt_content in src_content:
|
||||
# If the content of tgt is in src, return 1.0 since output src might be
|
||||
# a superset(language description+number) of tgt
|
||||
return 1.0
|
||||
src_content_num = float(src_content)
|
||||
if abs(src_content_num - tgt_content_num) < 1e-4:
|
||||
return 1.0
|
||||
return 0.0
|
||||
except:
|
||||
if src_content == tgt_content:
|
||||
return 1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html"
|
||||
rule = {
|
||||
"type:": "value",
|
||||
"value": "anmi_sharper.png"
|
||||
}
|
||||
print(check_html_background_image(src_path, rule))
|
||||
|
||||
Reference in New Issue
Block a user