This commit is contained in:
tsuky_chen
2024-03-09 18:53:27 +08:00
48 changed files with 2106 additions and 155 deletions

View File

@@ -24,6 +24,7 @@ from .chrome import (
get_gotoRecreationPage_and_get_html_content,
get_url_dashPart,
get_active_url_from_accessTree,
get_find_installed_extension_name,
get_info_from_website
)
from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
@@ -31,7 +32,8 @@ from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command
from .gimp import get_gimp_config_file
from .impress import get_audio_in_slide, get_background_image_in_slide
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
from .replay import get_replay
from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
from .vscode import get_vscode_config
from .calc import get_conference_city_in_order

View File

@@ -0,0 +1,15 @@
import csv
# I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
def get_conference_city_in_order(env, config):
# read the csv file
csv_path = config['csv_path']
print(f"Reading csv file from {csv_path}")
with open(csv_path, 'r') as f:
reader = csv.reader(f)
# skip the header row
next(reader)
# get the third column in the order of rows
conference_city_list = [row[2] for row in reader]
return conference_city_list

View File

@@ -4,6 +4,7 @@ import os
import platform
import sqlite3
import time
from urllib.parse import unquote
from typing import Dict, Any, List
from urllib.parse import urlparse, parse_qs
@@ -1010,6 +1011,43 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
return "Google"
def get_find_installed_extension_name(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
if "arm" in platform.machine():
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
'output'].strip()
else:
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# Preferences store all the path of installed extensions, return them all and let metrics try to find one matches the targeted extension path
all_extensions_name = []
all_extensions = data.get('extensions', {}).get('settings', {})
for id in all_extensions.keys():
name = all_extensions[id]["manifest"]["name"]
all_extensions_name.append(name)
return all_extensions_name
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_data_delete_automacally(env, config: Dict[str, str]):
"""
This function is used to open th "auto-delete" mode of chromium
@@ -1037,8 +1075,8 @@ def get_data_delete_automacally(env, config: Dict[str, str]):
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
data_delete_state = data["profile"]["exit_type"]
return data_delete_state
data_delete_state = data["profile"].get("default_content_setting_values", None)
return "true" if data_delete_state is not None else "false"
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
@@ -1077,6 +1115,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
"""
active_tab_url = get_active_url_from_accessTree(env, config)
if not isinstance(active_tab_url, str):
logger.error("active_tab_url is not a string")
return None
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
@@ -1109,12 +1148,14 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
for context in browser.contexts:
for page in context.pages:
page.wait_for_load_state("networkidle")
if page.url == active_tab_url:
# the accTree and playwright can get encoding(percent-encoding) characters, we need to convert them to normal characters
if unquote(page.url) == unquote(active_tab_url):
target_page = page
print("tartget page url: ", target_page.url)
print("tartget page title: ", target_page.title())
print("\33[32mtartget page url: ", target_page.url, "\33[0m")
print("\33[32mtartget page title: ", target_page.title(), "\33[0m")
break
if target_page is None:
logger.error("Your tab is not the target tab.")
return {}
return_json = {}
if config["category"] == "class":

View File

@@ -1,6 +1,7 @@
import os
from typing import Dict, List, Set
from typing import Optional, Any, Union
from datetime import datetime
import requests
import pandas as pd
@@ -77,21 +78,37 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
gives (List[int]): optional. defaults to [0]. which files are directly
returned to the metric. if len==1, str is returned; else, list is
returned.
only support for single file now:
time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
"""
time_format = "%Y_%m_%d"
if not config.get("multi", False):
paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]]
print(config)
if "time_suffix" in config.keys() and config["time_suffix"]:
if "time_format" in config.keys():
time_format = config["time_format"]
# Insert time before . in file type suffix
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
else:
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
print(paths)
print(dests)
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
print("env cache_dir: ")
print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
print("_path: ")
print(_path)
file = env.controller.get_file(p)
if file is None:
#return None
@@ -104,7 +121,9 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
cache_paths.append(_path)
with open(_path, "wb") as f:
f.write(file)
# debug
print("cache_paths")
print(cache_paths)
return cache_paths[0] if len(cache_paths)==1 else cache_paths

View File

@@ -195,3 +195,10 @@ def get_accessibility_tree(env, *args) -> str:
accessibility_tree: str = env.controller.get_accessibility_tree()
logger.debug("AT@eval: %s", accessibility_tree)
return accessibility_tree
def get_time_diff_range(env, config) -> str:
try:
return config["diff_range_in_minutes"]
except:
logger.error("diff_range_in_minutes not found in config.")
return None

View File

@@ -2,7 +2,8 @@ from .basic_os import (
check_gnome_favorite_apps,
is_utc_0,
check_text_enlarged,
check_moved_jpgs
check_moved_jpgs,
is_in_vm_clickboard
)
from .chrome import (
is_expected_tabs,
@@ -19,6 +20,7 @@ from .chrome import (
is_expected_active_tab,
is_expected_url_pattern_match,
is_added_to_steam_cart,
is_expected_installed_extensions,
compare_pdf_images
)
from .docs import (
@@ -47,6 +49,7 @@ from .docs import (
check_file_exists,
check_tabstops,
compare_contains_image,
compare_docx_files_and_ignore_new_lines,
compare_docx_images,
compare_image_text,
compare_references
@@ -62,6 +65,14 @@ from .general import (
fuzzy_match,
check_include_exclude,
check_direct_json_object,
compare_time_in_speedtest_results,
is_included_all_json_objects,
is_gold_text_included_in_pdf,
check_csv_line_number,
file_contains,
compare_terminal_and_txt,
fuzzy_place_math,
compare_python_pure_text,
diff_text_file,
literal_match
)
@@ -69,7 +80,7 @@ from .gimp import (
check_brightness_decrease_and_structure_sim,
check_contrast_increase_and_structure_sim,
check_saturation_increase_and_structure_sim,
check_image_size_and_structure_sim,
check_image_size,
check_image_mirror,
check_palette_and_structure_sim,
check_textbox_on_leftside,
@@ -82,7 +93,9 @@ from .gimp import (
increase_saturation,
decrease_brightness,
check_file_exists,
compare_triangle_positions
compare_triangle_positions,
check_sharper,
check_image_file_size
)
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
@@ -126,13 +139,16 @@ from .vscode import (
compare_text_file,
compare_config,
compare_answer,
compare_result_files,
is_extension_installed,
check_json_settings,
check_json_keybindings,
check_python_file_by_test_suite,
check_python_file_by_gold_file,
check_html_background_image,
compare_zip_files
)
from .calc import compare_conference_city_in_order
from .others import compare_epub, check_mp3_meta
def infeasible():

View File

@@ -56,3 +56,15 @@ def check_moved_jpgs(directory_list, rule):
return 1
else:
return 0
def is_in_vm_clickboard(config, terminal_output):
print("terminal_output: ")
print(terminal_output)
print("config: ")
print(config)
expected_results = config["expected"]
# check if terminal_output has expected results
if not isinstance(expected_results, list):
return 1 if expected_results in terminal_output else 0
else:
return 1 if all(result in terminal_output for result in expected_results) else 0

View File

@@ -0,0 +1,27 @@
import openpyxl
def compare_conference_city_in_order( actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
for row in sheet["C2:C22"]:
for cell in row:
actual_city_list.append(cell.value)
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
print("expected_city_list:")
print(expected_city_list)
print("actual_city_list_path:")
print(actual_city_list)
wrong_list = []
try:
for i in range(len(actual_city_list)):
if expected_city_list[i] not in actual_city_list[i]:
wrong_list.append(i)
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
except:
return False
return True if len(wrong_list) == 0 else False

View File

@@ -2,9 +2,9 @@ import logging
import os
import re
import shutil
from itertools import product
from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag
@@ -61,6 +61,12 @@ def is_expected_url_pattern_match(result, rules) -> float:
return 1.
def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
Checks if the expected tabs are open in Chrome.
@@ -94,12 +100,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
elif rule['type'] == "liked_authors_websites_urls":
# Check if "liked authors" folder exists
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
if liked_authors_folder:
# Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url']
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else:
return 0.
else:
@@ -140,15 +158,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
import fitz
from PIL import Image
from io import BytesIO
from borb.pdf import Document
from borb.pdf import PDF
from pathlib import Path
import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
@@ -163,14 +182,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
images.append(img)
return images
def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh:
doc = PDF.loads(fh)
with open(out_path, "wb") as fh:
PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path))
@@ -183,7 +202,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes():
return 0.
return 1.

View File

@@ -0,0 +1,28 @@
import fitz # PyMuPDF
def extract_answers_from_pdf(pdf_file):
# 打开PDF文件
doc = fitz.open(pdf_file)
answers = []
# 遍历每一页
for page in doc:
# 提取当前页的文本
text = page.get_text()
# 分割文本为行
lines = text.split('\n')
for line in lines:
if line.strip(): # 排除空白行
# 分割等号,提取答案
parts = line.split('=')
if len(parts) > 1:
answer = parts[-1].strip() # 取等号后的部分为答案
answers.append(answer)
return answers
# 假设你的文件名是'math_problems.pdf'
pdf_file = '/Users/lxc/Desktop/calculus.pdf'
answers = extract_answers_from_pdf(pdf_file)
for i, answer in enumerate(answers, 1):
print(f"题目{i}的答案是: {answer}")

View File

@@ -3,8 +3,10 @@ import os
import re
import xml.etree.ElementTree as ET
import zipfile
from io import BytesIO
from typing import List, Dict, Any
from PIL import Image
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
from docx.shared import RGBColor
@@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules):
default_font = None
expected_font = rules["font_name"]
if not config_file_path:
return 0
try:
tree = ET.parse(config_file_path)
root = tree.getroot()
@@ -42,6 +47,9 @@ def find_default_font(config_file_path, rules):
def contains_page_break(docx_file):
if not docx_file:
return 0
doc = Document(docx_file)
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
@@ -62,6 +70,9 @@ def compare_docx_files(file1, file2, **options):
ignore_order = options.get('ignore_order', False)
content_only = options.get('content_only', False)
if not file1 or not file2:
return 0
def get_paragraph_texts_odt(document):
paragraphs = document.getElementsByType(P)
paragraph_texts = []
@@ -118,20 +129,30 @@ def compare_docx_files(file1, file2, **options):
if text1 != text2:
return 0
else:
print("ignore_blanks=false")
if len(doc1_paragraphs) != len(doc2_paragraphs):
print(doc1_paragraphs)
print(doc2_paragraphs)
print(len(doc1_paragraphs))
print(len(doc2_paragraphs))
return 0
print("in compare")
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
print(p1)
print(p2)
return 0
return 1
def compare_init_lines(file1, file2):
if not file1 or not file2:
return 0
doc1 = Document(file1)
doc2 = Document(file2)
@@ -149,6 +170,9 @@ def compare_init_lines(file1, file2):
def compare_docx_tables(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -174,11 +198,10 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
from io import BytesIO
from PIL import Image
def compare_docx_images(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -212,6 +235,9 @@ def compare_image_text(image_path, rule):
def compare_line_spacing(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
if not compare_docx_files(docx_file1, docx_file2):
return 0
doc1 = Document(docx_file1)
@@ -233,6 +259,9 @@ def compare_line_spacing(docx_file1, docx_file2):
def compare_insert_equation(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
if not compare_docx_files(docx_file1, docx_file2):
return 0
@@ -248,6 +277,9 @@ def compare_insert_equation(docx_file1, docx_file2):
def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
if not docx_file:
return 0
doc = Document(docx_file)
expected_font = rules["font_name"]
@@ -260,6 +292,9 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
def compare_subscript_contains(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -272,6 +307,9 @@ def compare_subscript_contains(docx_file1, docx_file2):
def has_page_numbers_in_footers(docx_file):
if not docx_file:
return 0
doc = Document(docx_file)
for section in doc.sections:
@@ -286,6 +324,9 @@ def has_page_numbers_in_footers(docx_file):
def is_first_line_centered(docx_file):
if not docx_file:
return 0
doc = Document(docx_file)
first_paragraph = doc.paragraphs[0]
@@ -294,11 +335,16 @@ def is_first_line_centered(docx_file):
def check_file_exists(directory, filename):
if not directory or not filename:
return 0
file_path = os.path.join(directory, filename)
return 1 if os.path.isfile(file_path) else 0
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
if not docx_file1 or not docx_file2:
return .0
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
para1 = [p for p in doc1.paragraphs if p.text.strip()]
@@ -334,6 +380,9 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
def compare_contains_image(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -346,6 +395,9 @@ def compare_contains_image(docx_file1, docx_file2):
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
@@ -380,6 +432,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
def check_highlighted_words(file_path1, file_path2):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
@@ -402,6 +457,9 @@ def check_highlighted_words(file_path1, file_path2):
def evaluate_strike_through_last_paragraph(file_path1, file_path2):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
@@ -418,6 +476,9 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):
def evaluate_conversion(file_path):
if not file_path:
return 0
document = Document(file_path)
for table in document.tables:
@@ -437,6 +498,9 @@ def evaluate_conversion(file_path):
def evaluate_spacing(file_path):
if not file_path:
return 0
document = Document(file_path)
# Check line spacing for introduction, body, and conclusion
@@ -450,6 +514,9 @@ def evaluate_spacing(file_path):
def check_italic_font_size_14(path1, path2):
if not path1 or not path2:
return 0
if not compare_docx_files(path1, path2):
return 0
document = Document(path1)
@@ -463,6 +530,9 @@ def check_italic_font_size_14(path1, path2):
def evaluate_alignment(docx_path):
if not docx_path:
return 0
# Load the document
doc = Document(docx_path)
@@ -492,6 +562,9 @@ def evaluate_alignment(docx_path):
def get_unique_train_ids(initial_file): # fixed standard
if not initial_file:
return set(), 0
doc = Document(initial_file)
train_ids = set()
processed_lines = 0
@@ -508,6 +581,9 @@ def get_unique_train_ids(initial_file): # fixed standard
def check_no_duplicates(initial_file, processed_file):
if not initial_file or not processed_file:
return 0
# Open the document
train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
doc_processed = Document(processed_file)
@@ -535,6 +611,9 @@ def check_no_duplicates(initial_file, processed_file):
def compare_docx_lines(file1, file2):
if not file1 or not file2:
return 0
# Read the text of the document, line by line
doc1 = Document(file1)
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
@@ -551,8 +630,47 @@ def compare_docx_lines(file1, file2):
return 0
def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
if not file1 or not file2:
return 0
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
doc1 = Document(file1)
doc2 = Document(file2)
# First, delete all the blank in paragraphs
doc1 = [p for p in doc1.paragraphs if p.text != '']
doc2 = [p for p in doc2.paragraphs if p.text != '']
doc1_paragraphs = [p.text for p in doc1]
doc2_paragraphs = [p.text for p in doc2]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Process and compare documents
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if text1 != text2:
return 0
else:
if len(doc1_paragraphs) != len(doc2_paragraphs):
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
return 0
return 1
# Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
def compare_highlighted_text(file1, file2):
if not file1 or not file2:
return 0
def extract_highlighted_text(file_path):
highlighted_texts = []
@@ -590,6 +708,9 @@ def compare_highlighted_text(file1, file2):
def compare_references(file1, file2, **options):
if not file1 or not file2:
return 0
reference_indicator = options.get('reference_indicator', 'References')
reference_base_result = options.get('reference_base_result', 0.5)
@@ -634,48 +755,3 @@ def compare_references(file1, file2, **options):
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0
def compare_answer(file1, file2, **options):
"""This is a specific function to compare the """
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
if ref1_idx == -1 and ref2_idx == -1:
return 1
if ref1_idx == -1 or ref2_idx == -1:
return 0
# split the reference section into reference items, and remove the empty string items
ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
# Compare the references
if len(ref1) != len(ref2):
return 0
total_similarity = 0
for r1, r2 in zip(ref1, ref2):
# fuzzy match the references
similarity = fuzz.ratio(r1, r2) / 100.0
total_similarity += similarity
result = total_similarity / len(ref1)
if result >= reference_base_result:
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0

View File

@@ -1,7 +1,9 @@
import csv
import datetime
import difflib
import functools
import json
import yaml
import logging
import operator
import re
import sqlite3
@@ -10,17 +12,19 @@ from typing import Callable, Any, Union
from typing import Dict, List, Pattern
import lxml.etree
import pandas as pd
import pdfplumber
import yaml
from docx import Document
from lxml.cssselect import CSSSelector
from lxml.etree import _Element
from rapidfuzz import fuzz
import difflib
from .utils import _match_record, _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.general")
def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
if result is None:
return 0.
@@ -68,6 +72,7 @@ def is_in_list(result, rules) -> float:
else:
return 0.
def diff_text_file(result: str, expect: str) -> float:
if result is None:
return 0.
@@ -78,12 +83,32 @@ def diff_text_file(result: str, expect: str) -> float:
expected_lines: List[str] = f.read().splitlines()
return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
def fuzzy_match(result, rules) -> float:
expect = rules["expected"]
return fuzz.ratio(result, expect) / 100.
def fuzzy_place_math(result_file_path, rules) -> float:
expect = rules["expected"] # a list of possible answers
# read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
doc = Document(result_file_path)
words_list = []
for para in doc.paragraphs:
words_list.extend(para.text.split())
# Print out the list of extracted words
print("Your Answers: ")
print(words_list)
fuzzy_score_list = []
for word in words_list:
max_score = 0
for ans in expect:
score = fuzz.ratio(word, ans)
max_score = max(max_score, score)
fuzzy_score_list.append(max_score)
return sum(fuzzy_score_list) / len(fuzzy_score_list)
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
Args:
@@ -191,10 +216,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
return 0.
if "text" in r:
match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
@@ -267,20 +292,157 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
return float(metric)
def check_direct_json_object(result, rules)->float:
def check_direct_json_object(result, rules) -> float:
"""
One of the most commonly used function to evalute.
Compare two json objects directly.
"""
if isinstance(result, str):
# remove blanks before and after result
result = result.strip()
# replace all ' with "
result = result.replace("'", '"')
# load json object
result = json.loads(result)
print("result: ")
print(result)
print("expected: ")
print(rules["expected"])
if result is None:
return 0.
expected_json = rules["expected"]
for key in expected_json.keys():
expected_value = expected_json.get(key)
if expected_value != result.get(key):
return 0.
return 1.0
expect_in_result = rules.get("expect_in_result", False)
if not expect_in_result:
expected_json = rules["expected"]
for key in expected_json.keys():
expected_value = expected_json.get(key)
if expected_value != result.get(key):
return 0.
return 1.0
else:
expected_json = rules["expected"]
for key in expected_json.keys():
expected_value = expected_json.get(key)
if expected_value not in result.get(key):
return 0.
return 1.0
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
if not speedtest_result_path:
return 0
# open the speedtest results file(csv)
date_col = None
with open(speedtest_result_path, 'r') as f:
reader = pd.read_csv(f)
for column in reader.columns:
if column.startswith('TEST_DATE'):
date_col = column
break
now_date_time = datetime.datetime.now().strftime('%H:%M')
for date in reader[date_col]:
date_time = date[-5:]
# compare the date time with the current date time, if time diff less than time_diff para, then return true
if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
'%H:%M')).total_seconds()) / 60 < int(
time_diff):
return 0
return 1
def is_included_all_json_objects(gold_file_path, result_file_path):
if not gold_file_path or not result_file_path:
return 0
print("gold_file_path: ")
print(gold_file_path)
print("result_file_path: ")
print(result_file_path)
# two json file, check if all the key-value pair in gold_file_path is included in result_file_path
with open(gold_file_path, 'r') as f:
gold_json = json.load(f)
with open(result_file_path, 'r') as fr:
result_json = json.load(fr)
for key in gold_json.keys():
if key not in result_json.keys() or gold_json[key] != result_json[key]:
return 0
return 1
def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
print("gold_text_path: ")
print(gold_text_path)
print("pdf_file_path: ")
print(pdf_file_path)
# gold file is a json file, we need to check all the value in json are included in pdf file.
with open(gold_text_path, 'r') as f:
gold_json = json.load(f)
with pdfplumber.open(pdf_file_path) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
false_list = []
for key in gold_json.keys():
if gold_json[key] not in text:
false_list.append(key)
if len(false_list) > 0:
print("false_list: ")
print(false_list)
return 0
else:
return 1
def file_contains(file_path, config):
# file_path ends with .txt
if not file_path:
return 1
with open(file_path, 'r') as f:
file_text = f.read()
for text in config["expected"]:
if text not in file_text:
return 0
return 1
def check_csv_line_number(file_path, line_number):
# check file_path suffix
if not file_path.endswith('.csv'):
return 0
# check line number
with open(file_path, 'r') as f:
reader = csv.reader(f)
line_count = sum(1 for row in reader)
return 1 if line_count == int(line_number["expected"]) else 0
def compare_terminal_and_txt(txt_file_path, terminal_output):
# read txt file content
with open(txt_file_path, 'r') as f:
txt_file_content = f.read()
# compare terminal output with txt file content
return 1 if terminal_output == txt_file_content else 0
def compare_python_pure_text(py_file_path, gold_file_path):
# first, change the suffix of gold_file from .txt to .py
print("py_file_path: ")
print(py_file_path)
print("gold_file_path: ")
print(gold_file_path)
# gold_file_path = gold_file_path.replace('.txt', '.py')
def remove_whitespace(text):
return ''.join(text.split())
with open(py_file_path, 'r') as file1:
content1 = file1.read()
with open(gold_file_path, 'r') as file2:
content2 = file2.read()
content1_no_whitespace = remove_whitespace(content1)
content2_no_whitespace = remove_whitespace(content2)
if content1_no_whitespace == content2_no_whitespace:
return 1
else:
return 0

View File

@@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat
def compare_image_list(pred_img_path_list: Union[str, List[str]],
gold_img_path_list: Union[str, List[str]]) -> float:
gold_img_path_list: Union[str, List[str]]) -> float:
""" Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
"""
if type(pred_img_path_list) != list:
@@ -177,6 +177,16 @@ def calculate_contrast(image):
return np.std(pixels)
def calculate_image_sharpness(image_path):
# Load the image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Apply the Laplacian operator
laplacian = cv2.Laplacian(image, cv2.CV_64F)
# Calculate the variance
variance = np.var(laplacian)
return variance
def structure_check_by_mse(img1, img2, threshold=0.03):
"""Check if two images are approximately the same by MSE"""
mse = np.mean(
@@ -295,7 +305,8 @@ def check_triangle_position(tgt_path):
# We assume the triangle is a different color from the background
# Find the unique colors
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True)
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0,
return_counts=True)
unique_colors_sorted = unique_colors[np.argsort(counts)]
# Assuming the background is the most common color and the triangle is a different color
@@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path):
return structure_same
def check_structure_sim_resized(src_path, tgt_path):
"""
Check if the structure of the two images are similar after resizing.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
"""
if src_path is None or tgt_path is None:
return 0.
img_src = Image.open(src_path)
img_tgt = Image.open(tgt_path)
# Resize the images to the same size
img_src = img_src.resize(img_tgt.size)
# Check if the structure is similar
structure_same = structure_check_by_ssim(img_src, img_tgt)
return structure_same
def check_contrast_increase_and_structure_sim(src_path, tgt_path):
"""
Check if the src image has higher contrast than the tgt image and the structures are similar
@@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule):
return 0.
def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None):
def check_image_size(src_path, rule):
"""
Check if the size of the src image is correct and the structure of the two images are similar.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
Check if the size of the src image is correct
multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081
"""
if src_path is None or tgt_path is None:
if src_path is None:
return 0.
# Load images
source_image = Image.open(src_path)
target_image = Image.open(tgt_path)
# Load the image
img = Image.open(src_path)
# Check size
if width is not None:
width_same = source_image.size[0] == width
else:
width_same = True
if height is not None:
height_same = source_image.size[1] == height
# Check the size
if rule["height"] is not None:
height_same = img.size[1] == rule["height"]
else:
height_same = True
if rule["width"] is not None:
width_same = img.size[0] == rule["width"]
else:
width_same = True
# Check structure
resized_target_image = target_image.resize(source_image.size)
structure_same = structure_check_by_ssim(source_image, resized_target_image)
if width_same and height_same and structure_same:
if height_same and width_same:
return 1.
else:
return 0.
@@ -521,6 +545,31 @@ def check_green_background(src_path, tgt_path):
return 1.
def check_sharper(src_path, tgt_path):
"""
Check if the source image is sharper than the target image.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
sharpness_src = calculate_image_sharpness(src_path)
sharpness_tgt = calculate_image_sharpness(tgt_path)
return 1.0 if sharpness_src > sharpness_tgt else 0.0
def check_image_file_size(src_path, rule):
"""
Check if the size of the src image within 500KB
"""
if src_path is None:
return 0.0
# Check the size
file_size = os.path.getsize(src_path)
if file_size < rule["max_size"]:
return 1.0
else:
return 0.0
if __name__ == "__main__":
actual_config_path = "../../../cache/sessionrc_test"
rule = {
@@ -550,3 +599,12 @@ if __name__ == "__main__":
tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
print(check_triangle_position(tgt_path))
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
print(check_sharper(src_path, tgt_path))
src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
rule = {
"max_size": 500000
}
print(check_image_file_size(src_path, rule))

View File

@@ -1,10 +1,12 @@
import builtins
import datetime
import functools
import itertools
import logging
import operator
import re
import zipfile
import pandas as pd
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match, Tuple, Pattern
from urllib.parse import urlparse, urlunparse

View File

@@ -229,3 +229,54 @@ def check_python_file_by_test_suite(actual_files, test_file, **options) -> float
def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
pass
def check_html_background_image(src_path: str, rule: Dict = None) -> float:
"""
Check if the background image is correctly set.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
from bs4 import BeautifulSoup
with open(src_path, 'r') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
styles = soup.find_all('style')
for style in styles:
if f'background-image: url(\'{rule["value"]}\')' in style.text:
return 1.0
return 0.0
def compare_result_files(src_path, tgt_path):
"""
Compare whether the content of two files are the same.
multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
"""
with open(src_path, 'r') as f:
src_content = f.read().strip()
with open(tgt_path, 'r') as f:
tgt_content = f.read().strip()
try:
# Compare the content as numbers
tgt_content_num = float(tgt_content)
if tgt_content in src_content:
# If the content of tgt is in src, return 1.0 since output src might be
# a superset(language description+number) of tgt
return 1.0
src_content_num = float(src_content)
if abs(src_content_num - tgt_content_num) < 1e-4:
return 1.0
return 0.0
except:
if src_content == tgt_content:
return 1.0
return 0.0
if __name__ == "__main__":
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html"
rule = {
"type:": "value",
"value": "anmi_sharper.png"
}
print(check_html_background_image(src_path, rule))