Merge branch 'main' of github.com:xlang-ai/DesktopEnv

This commit is contained in:
Jason Lee
2024-03-10 14:50:17 +08:00
27 changed files with 173 additions and 100 deletions

View File

@@ -82,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')

View File

@@ -96,16 +96,13 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
print("env cache_dir: ")
print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
print("_path: ")
print(_path)
file = env.controller.get_file(p)
if file is None:
#return None

View File

@@ -1,8 +1,13 @@
import logging
from typing import List
import openpyxl
def compare_conference_city_in_order( actual_city_list_path, expected_city):
logger = logging.getLogger("desktopenv.metrics.calc")
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
@@ -12,16 +17,25 @@ def compare_conference_city_in_order( actual_city_list_path, expected_city):
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
print("expected_city_list:")
print(expected_city_list)
print("actual_city_list_path:")
print(actual_city_list)
wrong_list = []
try:
for i in range(len(actual_city_list)):
if expected_city_list[i] not in actual_city_list[i]:
wrong_list.append(i)
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
elif isinstance(expected_city_list[i], List):
if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
else:
raise TypeError("Expected city should be a string or a list of strings")
except:
return False
return True if len(wrong_list) == 0 else False
return 0.
return 1.

View File

@@ -65,7 +65,16 @@ def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
# whether the expected extensions are installed
set_expected_extensions = set(expected_extensions)
set_installed_extensions = set(installed_extensions)
if set_expected_extensions.issubset(set_installed_extensions):
return 1.
else:
return 0.
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""

View File

@@ -223,17 +223,13 @@ def compare_docx_images(docx_file1, docx_file2):
return 1
import pytesseract
import easyocr
def compare_image_text(image_path, rule):
if not image_path:
return 0
img = Image.open(image_path)
img_text = pytesseract.image_to_string(img)
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
if rule['type'] == 'text':
return 1 if rule['text'] in img_text else 0
return 1 if rule['text'] in extracted_text else 0
else:
raise ValueError("Unsupported rule type")

View File

@@ -104,6 +104,27 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 0.0
import zipfile
from difflib import SequenceMatcher
import PyPDF2
def compare_pdf_content(content1, content2, text_similarity_threshold):
def extract_text_from_pdf(content):
with open("temp.pdf", "wb") as temp_pdf:
temp_pdf.write(content)
with open("temp.pdf", "rb") as temp_pdf:
pdf_reader = PyPDF2.PdfReader(temp_pdf)
text = ''
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
text1 = extract_text_from_pdf(content1)
text2 = extract_text_from_pdf(content2)
similarity_ratio = SequenceMatcher(None, text1, text2).ratio()
return similarity_ratio >= text_similarity_threshold
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
@@ -128,7 +149,12 @@ def compare_zip_files(actual: str, expected: str, **options) -> float:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
if file_name.lower().endswith('.pdf'):
if compare_pdf_content(content1, content2, 0.95):
continue
else:
return 0.0
elif content1 != content2:
return 0.0
return 1.0