xiaochuan's multiapp examples
This commit is contained in:
@@ -2,7 +2,8 @@ from .basic_os import (
|
||||
check_gnome_favorite_apps,
|
||||
is_utc_0,
|
||||
check_text_enlarged,
|
||||
check_moved_jpgs
|
||||
check_moved_jpgs,
|
||||
is_in_vm_clickboard
|
||||
)
|
||||
from .chrome import (
|
||||
is_expected_tabs,
|
||||
@@ -18,7 +19,8 @@ from .chrome import (
|
||||
is_expected_search_query,
|
||||
is_expected_active_tab,
|
||||
is_expected_url_pattern_match,
|
||||
is_added_to_steam_cart
|
||||
is_added_to_steam_cart,
|
||||
is_expected_installed_extensions
|
||||
)
|
||||
from .docs import (
|
||||
compare_font_names,
|
||||
@@ -45,7 +47,8 @@ from .docs import (
|
||||
is_first_line_centered,
|
||||
check_file_exists,
|
||||
check_tabstops,
|
||||
compare_contains_image
|
||||
compare_contains_image,
|
||||
compare_docx_files_and_ignore_new_lines
|
||||
)
|
||||
from .general import (
|
||||
check_csv,
|
||||
@@ -57,7 +60,15 @@ from .general import (
|
||||
is_in_list,
|
||||
fuzzy_match,
|
||||
check_include_exclude,
|
||||
check_direct_json_object
|
||||
check_direct_json_object,
|
||||
compare_time_in_speedtest_results,
|
||||
is_included_all_json_objects,
|
||||
is_gold_text_included_in_pdf,
|
||||
check_csv_line_number,
|
||||
file_contains,
|
||||
compare_terminal_and_txt,
|
||||
fuzzy_place_math,
|
||||
compare_python_pure_text
|
||||
)
|
||||
from .gimp import (
|
||||
check_brightness_decrease_and_structure_sim,
|
||||
@@ -124,7 +135,7 @@ from .vscode import (
|
||||
check_json_settings,
|
||||
check_json_keybindings
|
||||
)
|
||||
|
||||
from .calc import compare_conference_city_in_order
|
||||
|
||||
def infeasible():
|
||||
pass
|
||||
|
||||
@@ -56,3 +56,15 @@ def check_moved_jpgs(directory_list, rule):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def is_in_vm_clickboard(config, terminal_output):
|
||||
print("terminal_output: ")
|
||||
print(terminal_output)
|
||||
print("config: ")
|
||||
print(config)
|
||||
expected_results = config["expected"]
|
||||
# check if terminal_output has expected results
|
||||
if not isinstance(expected_results, list):
|
||||
return 1 if expected_results in terminal_output else 0
|
||||
else:
|
||||
return 1 if all(result in terminal_output for result in expected_results) else 0
|
||||
24
desktop_env/evaluators/metrics/calc.py
Normal file
24
desktop_env/evaluators/metrics/calc.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import openpyxl
|
||||
|
||||
def compare_conference_city_in_order( actual_city_list_path, expected_city):
|
||||
expected_city_list = expected_city["expected"]
|
||||
print(f"Reading csv file from {actual_city_list_path}")
|
||||
wb = openpyxl.load_workbook(actual_city_list_path)
|
||||
sheet = wb.active
|
||||
actual_city_list = []
|
||||
for row in sheet["C2:C22"]:
|
||||
for cell in row:
|
||||
actual_city_list.append(cell.value)
|
||||
# expected_city is the city that we want to compare with the actual city list
|
||||
# must in order index
|
||||
# debug
|
||||
print("expected_city_list:")
|
||||
print(expected_city_list)
|
||||
print("actual_city_list_path:")
|
||||
print(actual_city_list)
|
||||
wrong_list = []
|
||||
for i in range(len(actual_city_list)):
|
||||
if expected_city_list[i] not in actual_city_list[i]:
|
||||
wrong_list.append(i)
|
||||
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
|
||||
return True if len(wrong_list) == 0 else False
|
||||
@@ -61,6 +61,12 @@ def is_expected_url_pattern_match(result, rules) -> float:
|
||||
return 1.
|
||||
|
||||
|
||||
def is_expected_installed_extensions(installed_extensions, expected) -> float:
|
||||
print("installed_extensions: ")
|
||||
print(installed_extensions)
|
||||
expected_extensions = expected["expected"]
|
||||
return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
|
||||
|
||||
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Checks if the expected tabs are open in Chrome.
|
||||
|
||||
28
desktop_env/evaluators/metrics/demo.py
Normal file
28
desktop_env/evaluators/metrics/demo.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import fitz # PyMuPDF
|
||||
|
||||
def extract_answers_from_pdf(pdf_file):
|
||||
# 打开PDF文件
|
||||
doc = fitz.open(pdf_file)
|
||||
answers = []
|
||||
|
||||
# 遍历每一页
|
||||
for page in doc:
|
||||
# 提取当前页的文本
|
||||
text = page.get_text()
|
||||
# 分割文本为行
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
if line.strip(): # 排除空白行
|
||||
# 分割等号,提取答案
|
||||
parts = line.split('=')
|
||||
if len(parts) > 1:
|
||||
answer = parts[-1].strip() # 取等号后的部分为答案
|
||||
answers.append(answer)
|
||||
|
||||
return answers
|
||||
|
||||
# 假设你的文件名是'math_problems.pdf'
|
||||
pdf_file = '/Users/lxc/Desktop/calculus.pdf'
|
||||
answers = extract_answers_from_pdf(pdf_file)
|
||||
for i, answer in enumerate(answers, 1):
|
||||
print(f"题目{i}的答案是: {answer}")
|
||||
@@ -96,12 +96,19 @@ def compare_docx_files(file1, file2, **options):
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
print("ignore_blanks=false")
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
print(doc1_paragraphs)
|
||||
print(doc2_paragraphs)
|
||||
print(len(doc1_paragraphs))
|
||||
print(len(doc2_paragraphs))
|
||||
return 0
|
||||
|
||||
print("in compare")
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
print(p1)
|
||||
print(p2)
|
||||
return 0
|
||||
|
||||
return 1
|
||||
@@ -490,6 +497,39 @@ def compare_docx_lines(file1, file2):
|
||||
return 0
|
||||
|
||||
|
||||
def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
|
||||
ignore_blanks = options.get('ignore_blanks', True)
|
||||
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
# First, delete all the blank in paragraphs
|
||||
doc1 = [p for p in doc1.paragraphs if p.text != '']
|
||||
doc2 = [p for p in doc2.paragraphs if p.text != '']
|
||||
doc1_paragraphs = [p.text for p in doc1]
|
||||
doc2_paragraphs = [p.text for p in doc2]
|
||||
else:
|
||||
# Unsupported file types or mismatch
|
||||
print("Unsupported file types or mismatch between file types.")
|
||||
return 0
|
||||
|
||||
# Process and compare documents
|
||||
if ignore_blanks:
|
||||
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
return 0
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
# Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
|
||||
def compare_highlighted_text(file1, file2):
|
||||
def extract_highlighted_text(file_path):
|
||||
|
||||
@@ -3,15 +3,18 @@ import functools
|
||||
import json
|
||||
import operator
|
||||
import re
|
||||
import pdfplumber
|
||||
import sqlite3
|
||||
from numbers import Number
|
||||
from typing import Callable, Any, Union
|
||||
from typing import Dict, List, Pattern
|
||||
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import lxml.etree
|
||||
from lxml.cssselect import CSSSelector
|
||||
from lxml.etree import _Element
|
||||
from rapidfuzz import fuzz
|
||||
from docx import Document
|
||||
|
||||
from .utils import _match_record, _match_value_to_rule
|
||||
|
||||
@@ -46,13 +49,27 @@ def is_in_list(result, rules) -> float:
|
||||
return 0.
|
||||
|
||||
|
||||
|
||||
def fuzzy_match(result, rules) -> float:
|
||||
expect = rules["expected"]
|
||||
|
||||
return fuzz.ratio(result, expect) / 100.
|
||||
|
||||
|
||||
def fuzzy_place_math(result_file_path, rules) -> float:
|
||||
expect = rules["expected"] # a list of possible answers
|
||||
# read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
|
||||
doc = Document(result_file_path)
|
||||
words_list = []
|
||||
for para in doc.paragraphs:
|
||||
words_list.extend(para.text.split())
|
||||
# 打印出提取的单词列表
|
||||
print(words_list)
|
||||
for word in words_list:
|
||||
if not any(ans in word for ans in expect):
|
||||
print("Wrong place:", word)
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
|
||||
"""
|
||||
Args:
|
||||
@@ -227,15 +244,138 @@ def check_direct_json_object(result, rules)->float:
|
||||
One of the most commonly used function to evalute.
|
||||
Compare two json objects directly.
|
||||
"""
|
||||
if isinstance(result, str):
|
||||
# remove blanks before and after result
|
||||
result = result.strip()
|
||||
# replace all ' with "
|
||||
result = result.replace("'", '"')
|
||||
# load json object
|
||||
result = json.loads(result)
|
||||
print("result: ")
|
||||
print(result)
|
||||
print("expected: ")
|
||||
print(rules["expected"])
|
||||
if result is None:
|
||||
return 0.
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value != result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
expect_in_result = rules.get("expect_in_result", False)
|
||||
if not expect_in_result:
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value != result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
else:
|
||||
expected_json = rules["expected"]
|
||||
for key in expected_json.keys():
|
||||
expected_value = expected_json.get(key)
|
||||
if expected_value not in result.get(key):
|
||||
return 0.
|
||||
return 1.0
|
||||
|
||||
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
|
||||
# open the speedtest results file(csv)
|
||||
date_col = None
|
||||
with open(speedtest_result_path, 'r') as f:
|
||||
reader = pd.read_csv(f)
|
||||
for column in reader.columns:
|
||||
if column.startswith('TEST_DATE'):
|
||||
date_col = column
|
||||
break
|
||||
now_date_time = datetime.datetime.now().strftime('%H:%M')
|
||||
for date in reader[date_col]:
|
||||
date_time = date[-5:]
|
||||
# compare the date time with the current date time, if time diff less than time_diff para, then return true
|
||||
if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_included_all_json_objects(gold_file_path, result_file_path):
|
||||
print("gold_file_path: ")
|
||||
print(gold_file_path)
|
||||
print("result_file_path: ")
|
||||
print(result_file_path)
|
||||
# two json file, check if all the key-value pair in gold_file_path is included in result_file_path
|
||||
with open(gold_file_path, 'r') as f:
|
||||
gold_json = json.load(f)
|
||||
with open(result_file_path, 'r') as fr:
|
||||
result_json = json.load(fr)
|
||||
for key in gold_json.keys():
|
||||
if key not in result_json.keys() or gold_json[key] != result_json[key]:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
|
||||
print("gold_text_path: ")
|
||||
print(gold_text_path)
|
||||
print("pdf_file_path: ")
|
||||
print(pdf_file_path)
|
||||
# gold file is a json file, we need to check all the value in json are included in pdf file.
|
||||
with open(gold_text_path, 'r') as f:
|
||||
gold_json = json.load(f)
|
||||
with pdfplumber.open(pdf_file_path) as pdf:
|
||||
text = ''
|
||||
for page in pdf.pages:
|
||||
text += page.extract_text()
|
||||
false_list = []
|
||||
for key in gold_json.keys():
|
||||
if gold_json[key] not in text:
|
||||
false_list.append(key)
|
||||
if len(false_list) > 0:
|
||||
print("false_list: ")
|
||||
print(false_list)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def file_contains(file_path, config):
|
||||
# file_path ends with .txt
|
||||
if not file_path :
|
||||
return False
|
||||
with open(file_path, 'r') as f:
|
||||
file_text = f.read()
|
||||
for text in config["expected"]:
|
||||
if text not in file_text:
|
||||
return False
|
||||
return True
|
||||
|
||||
def check_csv_line_number(file_path, line_number):
|
||||
# check file_path suffix
|
||||
if not file_path.endswith('.csv'):
|
||||
return False
|
||||
# check line number
|
||||
with open(file_path, 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
line_count = sum(1 for row in reader)
|
||||
return True if line_count == int(line_number["expected"]) else False
|
||||
|
||||
|
||||
def compare_terminal_and_txt(txt_file_path, terminal_output):
|
||||
# read txt file content
|
||||
with open(txt_file_path, 'r') as f:
|
||||
txt_file_content = f.read()
|
||||
# compare terminal output with txt file content
|
||||
return True if terminal_output == txt_file_content else False
|
||||
|
||||
|
||||
def compare_python_pure_text(py_file_path, gold_file_path):
|
||||
# first, change the suffix of gold_file from .txt to .py
|
||||
print("py_file_path: ")
|
||||
print(py_file_path)
|
||||
print("gold_file_path: ")
|
||||
print(gold_file_path)
|
||||
# gold_file_path = gold_file_path.replace('.txt', '.py')
|
||||
def remove_whitespace(text):
|
||||
return ''.join(text.split())
|
||||
with open(py_file_path, 'r') as file1:
|
||||
content1 = file1.read()
|
||||
with open(gold_file_path, 'r') as file2:
|
||||
content2 = file2.read()
|
||||
# 移除文件内容中的所有空白字符
|
||||
content1_no_whitespace = remove_whitespace(content1)
|
||||
content2_no_whitespace = remove_whitespace(content2)
|
||||
# 比较处理后的文件内容
|
||||
return content1_no_whitespace == content2_no_whitespace
|
||||
@@ -1,10 +1,12 @@
|
||||
import builtins
|
||||
import datetime
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import operator
|
||||
import re
|
||||
import zipfile
|
||||
import pandas as pd
|
||||
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
||||
from typing import Dict, List, Set, Match, Tuple, Pattern
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
Reference in New Issue
Block a user