Merge branch 'main' into zdy

This commit is contained in:
David Chang
2024-03-15 12:12:40 +08:00
189 changed files with 4679 additions and 9135 deletions

View File

@@ -1,5 +1,8 @@
# DesktopEnv: An Environment towards Human-like Computer Task Mastery
# OSWorld: Open-Ended Tasks in Real Computer Environments
<p align="center">
<img src="desktop_env/assets/icon.jpg" alt="Logo" width="80px">
<br>
<b>SLOGAN</b>
</p>
@@ -8,7 +11,7 @@
<a href="">Paper</a>
</p>
![Overview](media/overview.png)
![Overview]()
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!

BIN
desktop_env/assets/icon.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

View File

@@ -53,8 +53,8 @@ class DesktopEnv(gym.Env):
def __init__(
self,
path_to_vm: str,
snapshot_name: str = "init_state",
action_space: str = "computer_13",
task_config: Dict[str, Any] = None,
tmp_dir: str = "tmp",
cache_dir: str = "cache",
screen_size: Tuple[int] = (1920, 1080),
@@ -64,15 +64,6 @@ class DesktopEnv(gym.Env):
Args:
path_to_vm (str): path to .vmx file
action_space (str): "computer_13" | "pyautogui"
task_config (Dict[str, Any]): manages task configs integratedly,
including
* base snapshot
* task id (uuid)
* instruction
* setup config
* evaluator config
tmp_dir (str): temporary directory to store trajectory stuffs like
the extracted screenshots
cache_dir (str): cache directory to cache task-related stuffs like
@@ -81,23 +72,20 @@ class DesktopEnv(gym.Env):
# Initialize environment variables
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm)))
self.snapshot_name = snapshot_name
self.tmp_dir_base: str = tmp_dir
self.cache_dir_base: str = cache_dir
self.vm_screen_size = screen_size
self.vm_screen_size = screen_size # todo: add the logic to get the screen size from the VM
self.headless = headless
os.makedirs(self.tmp_dir_base, exist_ok=True)
# task-aware stuffs
# todo: handling the logic of snapshot directory
self._set_task_info(task_config)
# Initialize emulator and controller
logger.info("Initializing...")
self._start_emulator()
self.vm_ip = self._get_vm_ip()
self.controller = PythonController(vm_ip=self.vm_ip)
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir)
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir_base)
# Meta info of the VM, move to the reset() function
self.vm_platform: str = "" # self.controller.get_vm_platform()
@@ -147,7 +135,7 @@ class DesktopEnv(gym.Env):
raise Exception("Failed to get VM IP address!")
def _save_state(self):
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_name])
def _get_screenshot(self):
# random_uuid = str(uuid.uuid4())
@@ -167,7 +155,6 @@ class DesktopEnv(gym.Env):
return screenshot_image_path
def _set_task_info(self, task_config: Dict[str, Any]):
self.snapshot_path = task_config["snapshot"]
self.task_id: str = task_config["id"]
self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
os.makedirs(self.cache_dir, exist_ok=True)
@@ -239,8 +226,8 @@ class DesktopEnv(gym.Env):
)
os.makedirs(os.path.join(self.tmp_dir, "screenshots"))
logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
logger.info("Reverting to snapshot to {}...".format(self.snapshot_name))
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_name])
time.sleep(5)
print(self.vm_screen_size)

View File

@@ -24,14 +24,16 @@ from .chrome import (
get_gotoRecreationPage_and_get_html_content,
get_url_dashPart,
get_active_url_from_accessTree,
get_find_installed_extension_name,
get_info_from_website
)
from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
from .general import get_vm_command_line, get_vm_terminal_output
from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error
from .gimp import get_gimp_config_file
from .impress import get_audio_in_slide
from .impress import get_audio_in_slide, get_background_image_in_slide
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
from .replay import get_replay
from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
from .vscode import get_vscode_config
from .calc import get_conference_city_in_order

View File

@@ -0,0 +1,15 @@
import csv
# I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
def get_conference_city_in_order(env, config):
# read the csv file
csv_path = config['csv_path']
print(f"Reading csv file from {csv_path}")
with open(csv_path, 'r') as f:
reader = csv.reader(f)
# skip the header row
next(reader)
# get the third column in the order of rows
conference_city_list = [row[2] for row in reader]
return conference_city_list

View File

@@ -4,6 +4,7 @@ import os
import platform
import sqlite3
import time
from urllib.parse import unquote
from typing import Dict, Any, List
from urllib.parse import urlparse, parse_qs
@@ -81,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')
@@ -589,6 +584,10 @@ def get_active_url_from_accessTree(env, config):
if len(elements) == 0:
print("no elements found")
return None
elif elements[-1].text is None:
print("no text found")
return None
active_tab_url = config["goto_prefix"] + elements[0].text if "goto_prefix" in config.keys() else "https://" + \
elements[0].text
print("active tab url now: {}".format(active_tab_url))
@@ -1006,6 +1005,43 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
return "Google"
def get_find_installed_extension_name(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
if "arm" in platform.machine():
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
'output'].strip()
else:
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# Preferences store all the path of installed extensions, return them all and let metrics try to find one matches the targeted extension path
all_extensions_name = []
all_extensions = data.get('extensions', {}).get('settings', {})
for id in all_extensions.keys():
name = all_extensions[id]["manifest"]["name"]
all_extensions_name.append(name)
return all_extensions_name
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_data_delete_automacally(env, config: Dict[str, str]):
"""
This function is used to open th "auto-delete" mode of chromium
@@ -1033,8 +1069,8 @@ def get_data_delete_automacally(env, config: Dict[str, str]):
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
data_delete_state = data["profile"]["exit_type"]
return data_delete_state
data_delete_state = data["profile"].get("default_content_setting_values", None)
return "true" if data_delete_state is not None else "false"
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
@@ -1073,6 +1109,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
"""
active_tab_url = get_active_url_from_accessTree(env, config)
if not isinstance(active_tab_url, str):
logger.error("active_tab_url is not a string")
return None
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
@@ -1105,12 +1142,14 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
for context in browser.contexts:
for page in context.pages:
page.wait_for_load_state("networkidle")
if page.url == active_tab_url:
# the accTree and playwright can get encoding(percent-encoding) characters, we need to convert them to normal characters
if unquote(page.url) == unquote(active_tab_url):
target_page = page
print("tartget page url: ", target_page.url)
print("tartget page title: ", target_page.title())
print("\33[32mtartget page url: ", target_page.url, "\33[0m")
print("\33[32mtartget page title: ", target_page.title(), "\33[0m")
break
if target_page is None:
logger.error("Your tab is not the target tab.")
return {}
return_json = {}
if config["category"] == "class":

View File

@@ -1,6 +1,7 @@
import os
from typing import Dict, List, Set
from typing import Optional, Any, Union
from datetime import datetime
import requests
import pandas as pd
@@ -77,21 +78,31 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
gives (List[int]): optional. defaults to [0]. which files are directly
returned to the metric. if len==1, str is returned; else, list is
returned.
only support for single file now:
time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
"""
time_format = "%Y_%m_%d"
if not config.get("multi", False):
paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]]
if "time_suffix" in config.keys() and config["time_suffix"]:
if "time_format" in config.keys():
time_format = config["time_format"]
# Insert time before . in file type suffix
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
else:
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
_path = os.path.join(env.cache_dir, d)
file = env.controller.get_file(p)
if file is None:
#return None
@@ -104,7 +115,6 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
cache_paths.append(_path)
with open(_path, "wb") as f:
f.write(file)
return cache_paths[0] if len(cache_paths)==1 else cache_paths

View File

@@ -21,6 +21,22 @@ def get_vm_command_line(env, config: Dict[str, str]):
logger.error("Failed to get vm command line. Status code: %d", response.status_code)
return None
def get_vm_command_error(env, config: Dict[str, str]):
vm_ip = env.vm_ip
port = 5000
command = config["command"]
shell = config.get("shell", False)
response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell})
print(response.json())
if response.status_code == 200:
return response.json()["error"]
else:
logger.error("Failed to get vm command line error. Status code: %d", response.status_code)
return None
def get_vm_terminal_output(env, config: Dict[str, str]):
return env.controller.get_terminal_output()

View File

@@ -7,6 +7,67 @@ from typing import Dict
from desktop_env.evaluators.getters.file import get_vm_file
def get_background_image_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
image_id, image_file_path = None, None
ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
slide1_xml_file = 'ppt/slides/slide{}.xml'.format(slide_index + 1)
# firstly, check whether the background image is used in the slide
if slide1_xml_file not in myzip.namelist(): return None
with myzip.open(slide1_xml_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
bg_tag = "{http://schemas.openxmlformats.org/presentationml/2006/main}bgPr"
image_tag = "{http://schemas.openxmlformats.org/drawingml/2006/main}blip"
attr_tag = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
for child in root.iter(bg_tag):
try:
for element in child.iter(image_tag):
image_id = element.attrib[attr_tag]
break
except: pass
if image_id is not None: break
else: return None
# next, extract the background image from the slide
slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
if slide1_rels_file in myzip.namelist():
with myzip.open(slide1_rels_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
# Define the namespace used in the relationships file
namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
# Look for all relationship elements that have a type attribute for image
for rel in root.findall('r:Relationship', namespaces):
# Check if the relationship is for an image file
if 'image' in rel.attrib['Type'] and rel.attrib['Id'] == image_id:
target = rel.attrib['Target']
if target.startswith('..'):
# Resolve the relative path to get the correct path within the zip file
image_file_path = os.path.normpath(os.path.join('ppt/slides', target))
# Replace backslashes with forward slashes for ZIP compatibility
image_file_path = image_file_path.replace('\\', '/')
tmpdirname = os.path.dirname(ppt_file_localhost_path)
myzip.extract(image_file_path, tmpdirname)
image_file_path = os.path.join(tmpdirname, image_file_path)
return image_file_path
else: # absolute path
assert target.startswith("file://"), target
image_file_path = target[7:]
break
if image_file_path is None:
return None
else:
# Get the audio file from vm and return the file path in the host
return get_vm_file(env, {"path": image_file_path, "dest": dest})
def get_audio_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]

View File

@@ -195,3 +195,10 @@ def get_accessibility_tree(env, *args) -> str:
accessibility_tree: str = env.controller.get_accessibility_tree()
logger.debug("AT@eval: %s", accessibility_tree)
return accessibility_tree
def get_time_diff_range(env, config) -> str:
try:
return config["diff_range_in_minutes"]
except:
logger.error("diff_range_in_minutes not found in config.")
return None

View File

@@ -2,7 +2,8 @@ from .basic_os import (
check_gnome_favorite_apps,
is_utc_0,
check_text_enlarged,
check_moved_jpgs
check_moved_jpgs,
is_in_vm_clickboard
)
from .chrome import (
is_expected_tabs,
@@ -19,6 +20,7 @@ from .chrome import (
is_expected_active_tab,
is_expected_url_pattern_match,
is_added_to_steam_cart,
is_expected_installed_extensions,
compare_pdf_images
)
from .docs import (
@@ -47,8 +49,10 @@ from .docs import (
check_file_exists,
check_tabstops,
compare_contains_image,
compare_docx_files_and_ignore_new_lines,
compare_docx_images,
compare_image_text
compare_image_text,
compare_references
)
from .general import (
check_csv,
@@ -61,6 +65,14 @@ from .general import (
fuzzy_match,
check_include_exclude,
check_direct_json_object,
compare_time_in_speedtest_results,
is_included_all_json_objects,
is_gold_text_included_in_pdf,
check_line_number,
file_contains,
compare_terminal_and_txt,
fuzzy_place_math,
compare_python_pure_text,
diff_text_file,
literal_match
)
@@ -68,7 +80,7 @@ from .gimp import (
check_brightness_decrease_and_structure_sim,
check_contrast_increase_and_structure_sim,
check_saturation_increase_and_structure_sim,
check_image_size_and_structure_sim,
check_image_size,
check_image_mirror,
check_palette_and_structure_sim,
check_textbox_on_leftside,
@@ -81,7 +93,9 @@ from .gimp import (
increase_saturation,
decrease_brightness,
check_file_exists,
compare_triangle_positions
compare_triangle_positions,
check_sharper,
check_image_file_size
)
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
@@ -100,7 +114,8 @@ from .slides import (
)
from .table import (
compare_table,
compare_csv
compare_csv,
compare_conference_city_in_order
)
from .thunderbird import (
check_thunderbird_prefs,
@@ -125,11 +140,13 @@ from .vscode import (
compare_text_file,
compare_config,
compare_answer,
compare_result_files,
is_extension_installed,
check_json_settings,
check_json_keybindings,
check_python_file_by_test_suite,
check_python_file_by_gold_file,
check_html_background_image,
compare_zip_files
)
from .others import compare_epub, check_mp3_meta

View File

@@ -1,6 +1,3 @@
import subprocess
def check_gnome_favorite_apps(apps_str: str, rule):
# parse the string like "['thunderbird.desktop', 'vim.desktop', 'google-chrome.desktop']"
# to a list of strings
@@ -56,3 +53,16 @@ def check_moved_jpgs(directory_list, rule):
return 1
else:
return 0
def is_in_vm_clickboard(config, terminal_output):
print("terminal_output: ")
print(terminal_output)
print("config: ")
print(config)
expected_results = config["expected"]
# check if terminal_output has expected results
if not isinstance(expected_results, list):
return 1 if expected_results in terminal_output else 0
else:
return 1 if all(result in terminal_output for result in expected_results) else 0

View File

@@ -2,9 +2,9 @@ import logging
import os
import re
import shutil
from itertools import product
from typing import Any, Dict, List, Union
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
from bs4 import BeautifulSoup, Tag
@@ -61,6 +61,21 @@ def is_expected_url_pattern_match(result, rules) -> float:
return 1.
def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
# whether the expected extensions are installed
set_expected_extensions = set(expected_extensions)
set_installed_extensions = set(installed_extensions)
if set_expected_extensions.issubset(set_installed_extensions):
return 1.
else:
return 0.
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
Checks if the expected tabs are open in Chrome.
@@ -94,12 +109,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
elif rule['type'] == "liked_authors_websites_urls":
# Check if "liked authors" folder exists
liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
if liked_authors_folder:
# Check if it contains the specified URLs
liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
bookmark['type'] == 'url']
return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
urls = rule['urls']
for idx, url in enumerate(urls):
if isinstance(url, str):
urls[idx] = [url]
combinations = product(*urls)
for combination in combinations:
if set(combination) == set(liked_authors_urls):
return 1.
return 0.
else:
return 0.
else:
@@ -140,37 +167,54 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
import fitz
from PIL import Image
from io import BytesIO
from borb.pdf import Document
from borb.pdf import PDF
from pathlib import Path
import typing
def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
if not pdf1_path or not pdf2_path:
return 0.
def extract_images_from_pdf(pdf_path):
pdf_document = fitz.open(pdf_path)
images = []
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
image_list = page.get_images(full=True)
pixmap = page.get_pixmap()
for img_index, img_info in enumerate(image_list):
base_image = pdf_document.extract_image(img_index)
image_bytes = base_image["image"]
img = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples)
images.append(BytesIO(image_bytes))
images.append(img)
return images
def fix_pdf(in_path: Path, out_path: Path) -> None:
doc: typing.Optional[Document] = None
with open(in_path, "rb") as fh:
doc = PDF.loads(fh)
with open(out_path, "wb") as fh:
PDF.dumps(fh, doc)
fix_pdf(Path(pdf1_path), Path(pdf1_path))
fix_pdf(Path(pdf2_path), Path(pdf2_path))
images1 = extract_images_from_pdf(pdf1_path)
images2 = extract_images_from_pdf(pdf2_path)
if len(images1) != len(images2):
return 0.
for i, (img1, img2) in enumerate(zip(images1, images2), 1):
if Image.open(img1).tobytes() != Image.open(img2).tobytes():
for img1, img2 in zip(images1, images2):
if img1.tobytes() != img2.tobytes():
return 0.
return 1.
@@ -178,7 +222,10 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""
Compare two archives. Note that the files in the archives should be of the same type.
"""
if not pred_path: return 0.
file_path = kwargs.pop('file_path', '')
if not pred_path:
return 0.
pred_folder = os.path.splitext(pred_path)[0] + '_pred'
gold_folder = os.path.splitext(gold_path)[0] + '_gold'
@@ -186,13 +233,16 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
shutil.rmtree(pred_folder, ignore_errors=True)
os.makedirs(pred_folder)
shutil.unpack_archive(pred_path, pred_folder)
if not os.path.exists(gold_folder): # use cache if exists
os.makedirs(gold_folder)
shutil.unpack_archive(gold_path, gold_folder)
pred_files = sorted(os.listdir(pred_folder))
gold_files = sorted(os.listdir(gold_folder))
if pred_files != gold_files: return 0.
pred_files = sorted(os.listdir(os.path.join(pred_folder, file_path)))
gold_files = sorted(os.listdir(os.path.join(gold_folder, file_path)))
if pred_files != gold_files:
return 0.
def get_compare_function():
file_type = kwargs.pop('file_type', 'text')
@@ -228,8 +278,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
score = 0
compare_function = get_compare_function()
for f1, f2 in zip(pred_files, gold_files):
fp1 = os.path.join(pred_folder, f1)
fp2 = os.path.join(gold_folder, f2)
fp1 = os.path.join(pred_folder, file_path, f1)
fp2 = os.path.join(gold_folder, file_path, f2)
score += compare_function(fp1, fp2, **kwargs)
return score / len(pred_files)

View File

@@ -3,17 +3,19 @@ import os
import re
import xml.etree.ElementTree as ET
import zipfile
from io import BytesIO
from typing import List, Dict, Any
from PIL import Image
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
from docx.shared import RGBColor
from odf.opendocument import load
from odf.text import P
from odf.text import Span
from rapidfuzz import fuzz
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
from rapidfuzz import fuzz
logger = logging.getLogger("desktopenv.metric.docs")
@@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules):
default_font = None
expected_font = rules["font_name"]
if not config_file_path:
return 0
try:
tree = ET.parse(config_file_path)
root = tree.getroot()
@@ -42,7 +47,14 @@ def find_default_font(config_file_path, rules):
def contains_page_break(docx_file):
doc = Document(docx_file)
if not docx_file:
return 0
try:
doc = Document(docx_file)
except Exception as e:
logger.error(f"Error: {e}")
return 0
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
@@ -62,6 +74,9 @@ def compare_docx_files(file1, file2, **options):
ignore_order = options.get('ignore_order', False)
content_only = options.get('content_only', False)
if not file1 or not file2:
return 0
def get_paragraph_texts_odt(document):
paragraphs = document.getElementsByType(P)
paragraph_texts = []
@@ -80,16 +95,24 @@ def compare_docx_files(file1, file2, **options):
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
doc1 = Document(file1)
doc2 = Document(file2)
try:
doc1 = Document(file1)
doc2 = Document(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
elif file1.endswith('.odt') and file2.endswith('.odt'):
doc1 = load(file1)
doc2 = load(file2)
try:
doc1 = load(file1)
doc2 = load(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
doc1_paragraphs = get_paragraph_texts_odt(doc1)
doc2_paragraphs = get_paragraph_texts_odt(doc2)
if ignore_order:
@@ -118,22 +141,36 @@ def compare_docx_files(file1, file2, **options):
if text1 != text2:
return 0
else:
print("ignore_blanks=false")
if len(doc1_paragraphs) != len(doc2_paragraphs):
print(doc1_paragraphs)
print(doc2_paragraphs)
print(len(doc1_paragraphs))
print(len(doc2_paragraphs))
return 0
print("in compare")
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
print(p1)
print(p2)
return 0
return 1
def compare_init_lines(file1, file2):
doc1 = Document(file1)
doc2 = Document(file2)
if not file1 or not file2:
return 0
try:
doc1 = Document(file1)
doc2 = Document(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
@@ -149,8 +186,15 @@ def compare_init_lines(file1, file2):
def compare_docx_tables(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
if not docx_file1 or not docx_file2:
return 0
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# get list of tables in docx
tables1 = doc1.tables
@@ -173,12 +217,17 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
from io import BytesIO
from PIL import Image
def compare_docx_images(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
if not docx_file1 or not docx_file2:
return 0
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
def extract_images(doc):
images = []
@@ -187,7 +236,7 @@ def compare_docx_images(docx_file1, docx_file2):
img_data = rel.target_part.blob
images.append(BytesIO(img_data))
return images
images1 = extract_images(doc1)
images2 = extract_images(doc2)
if len(images1) != len(images2):
@@ -197,21 +246,31 @@ def compare_docx_images(docx_file1, docx_file2):
return 0
return 1
import pytesseract
import easyocr
def compare_image_text(image_path, rule):
img = Image.open(image_path)
img_text = pytesseract.image_to_string(img)
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
if rule['type'] == 'text':
return 1 if rule['text'] in img_text else 0
return 1 if rule['text'] in extracted_text else 0
else:
raise ValueError("Unsupported rule type")
def compare_line_spacing(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
if not compare_docx_files(docx_file1, docx_file2):
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
if len(doc1.paragraphs) != len(doc2.paragraphs):
return 0
@@ -229,11 +288,18 @@ def compare_line_spacing(docx_file1, docx_file2):
def compare_insert_equation(docx_file1, docx_file2):
if not docx_file1 or not docx_file2:
return 0
if not compare_docx_files(docx_file1, docx_file2):
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# Compare each paragraph if it contains equation
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
@@ -244,7 +310,15 @@ def compare_insert_equation(docx_file1, docx_file2):
def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
doc = Document(docx_file)
if not docx_file:
return 0
try:
doc = Document(docx_file)
except Exception as e:
logger.error(f"Error: {e}")
return 0
expected_font = rules["font_name"]
for paragraph in doc.paragraphs:
@@ -256,8 +330,15 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
def compare_subscript_contains(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
if not docx_file1 or not docx_file2:
return 0
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
for run1, run2 in zip(para1.runs, para2.runs):
@@ -268,7 +349,14 @@ def compare_subscript_contains(docx_file1, docx_file2):
def has_page_numbers_in_footers(docx_file):
doc = Document(docx_file)
if not docx_file:
return 0
try:
doc = Document(docx_file)
except Exception as e:
logger.error(f"Error: {e}")
return 0
for section in doc.sections:
footer = section.footer
@@ -282,7 +370,15 @@ def has_page_numbers_in_footers(docx_file):
def is_first_line_centered(docx_file):
doc = Document(docx_file)
if not docx_file:
return 0
try:
doc = Document(docx_file)
except Exception as e:
logger.error(f"Error: {e}")
return 0
first_paragraph = doc.paragraphs[0]
# check if the first line is center justified
@@ -290,13 +386,23 @@ def is_first_line_centered(docx_file):
def check_file_exists(directory, filename):
if not directory or not filename:
return 0
file_path = os.path.join(directory, filename)
return 1 if os.path.isfile(file_path) else 0
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
if not docx_file1 or not docx_file2:
return .0
try:
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return .0
para1 = [p for p in doc1.paragraphs if p.text.strip()]
para2 = [p for p in doc2.paragraphs if p.text.strip()]
if len(para1) != len(para2): return .0
@@ -313,7 +419,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
section = doc2.sections[0]
paragraph_width = section.page_width - section.left_margin - section.right_margin
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (
x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
minus = .0
for p1, p2 in zip(para1, para2):
# filter CLEAR tabstop and default left-0 tabstop
@@ -330,8 +436,15 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
def compare_contains_image(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
if not docx_file1 or not docx_file2:
return 0
try:
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
for run1, run2 in zip(para1.runs, para2.runs):
@@ -342,9 +455,18 @@ def compare_contains_image(docx_file1, docx_file2):
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
try:
document = Document(file_path1)
except Exception as e:
logger.error(f"Error: {e}")
return 0
threshold = kwargs.get('threshold', 3.5)
def _calculate_color_difference(rgb1, rgb2):
@@ -376,6 +498,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
def check_highlighted_words(file_path1, file_path2):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
@@ -398,9 +523,17 @@ def check_highlighted_words(file_path1, file_path2):
def evaluate_strike_through_last_paragraph(file_path1, file_path2):
if not file_path1 or not file_path2:
return 0
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
try:
document = Document(file_path1)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# Get the last paragraph
last_paragraph = document.paragraphs[-1]
@@ -414,7 +547,14 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):
def evaluate_conversion(file_path):
document = Document(file_path)
if not file_path:
return 0
try:
document = Document(file_path)
except Exception as e:
logger.error(f"Error: {e}")
return 0
for table in document.tables:
for row in table.rows:
@@ -433,7 +573,14 @@ def evaluate_conversion(file_path):
def evaluate_spacing(file_path):
document = Document(file_path)
if not file_path:
return 0
try:
document = Document(file_path)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# Check line spacing for introduction, body, and conclusion
introduction_spacing = document.paragraphs[0].paragraph_format.line_spacing
@@ -446,9 +593,18 @@ def evaluate_spacing(file_path):
def check_italic_font_size_14(path1, path2):
if not path1 or not path2:
return 0
if not compare_docx_files(path1, path2):
return 0
document = Document(path1)
try:
document = Document(path1)
except Exception as e:
logger.error(f"Error: {e}")
return 0
for paragraph in document.paragraphs:
for run in paragraph.runs:
if run.italic:
@@ -459,8 +615,15 @@ def check_italic_font_size_14(path1, path2):
def evaluate_alignment(docx_path):
if not docx_path:
return 0
# Load the document
doc = Document(docx_path)
try:
doc = Document(docx_path)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# Iterate through each paragraph in the document
for para in doc.paragraphs:
@@ -488,7 +651,15 @@ def evaluate_alignment(docx_path):
def get_unique_train_ids(initial_file): # fixed standard
doc = Document(initial_file)
if not initial_file:
return set(), 0
try:
doc = Document(initial_file)
except Exception as e:
logger.error(f"Error: {e}")
return set(), 0
train_ids = set()
processed_lines = 0
@@ -504,9 +675,18 @@ def get_unique_train_ids(initial_file): # fixed standard
def check_no_duplicates(initial_file, processed_file):
if not initial_file or not processed_file:
return 0
# Open the document
train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
doc_processed = Document(processed_file)
try:
doc_processed = Document(processed_file)
except Exception as e:
logger.error(f"Error: {e}")
return 0
train_ids_pro = set()
processed_lines = 0 # Counter for valid lines processed
@@ -531,11 +711,18 @@ def check_no_duplicates(initial_file, processed_file):
def compare_docx_lines(file1, file2):
# Read the text of the document, line by line
doc1 = Document(file1)
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
if not file1 or not file2:
return 0
doc2 = Document(file2)
# Read the text of the document, line by line
try:
doc1 = Document(file1)
doc2 = Document(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
doc2_lines = [p.text.strip() for p in doc2.paragraphs if p.text.strip()]
# print(doc1_lines)
# print(doc2_lines)
@@ -547,8 +734,52 @@ def compare_docx_lines(file1, file2):
return 0
def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
if not file1 or not file2:
return 0
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
try:
doc1 = Document(file1)
doc2 = Document(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
# First, delete all the blank in paragraphs
doc1 = [p for p in doc1.paragraphs if p.text != '']
doc2 = [p for p in doc2.paragraphs if p.text != '']
doc1_paragraphs = [p.text for p in doc1]
doc2_paragraphs = [p.text for p in doc2]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Process and compare documents
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if text1 != text2:
return 0
else:
if len(doc1_paragraphs) != len(doc2_paragraphs):
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
return 0
return 1
# Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
def compare_highlighted_text(file1, file2):
if not file1 or not file2:
return 0
def extract_highlighted_text(file_path):
highlighted_texts = []
@@ -583,3 +814,58 @@ def compare_highlighted_text(file1, file2):
return 1
else:
return 0
def compare_references(file1, file2, **options):
if not file1 or not file2:
return 0
reference_indicator = options.get('reference_indicator', 'References')
reference_base_result = options.get('reference_base_result', 0.5)
# Determine file types and load documents
if file1.endswith('.docx') and file2.endswith('.docx'):
try:
doc1 = Document(file1)
doc2 = Document(file2)
except Exception as e:
logger.error(f"Error: {e}")
return 0
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
return 0
# Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
if ref1_idx == -1 and ref2_idx == -1:
return 1
if ref1_idx == -1 or ref2_idx == -1:
return 0
# split the reference section into reference items, and remove the empty string items
ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
# Compare the references
if len(ref1) != len(ref2):
return 0
total_similarity = 0
for r1, r2 in zip(ref1, ref2):
# fuzzy match the references
similarity = fuzz.ratio(r1, r2) / 100.0
total_similarity += similarity
result = total_similarity / len(ref1)
if result >= reference_base_result:
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0

View File

@@ -1,8 +1,11 @@
import csv
import datetime
import difflib
import functools
import json
import yaml
import logging
import operator
import os
import re
import sqlite3
from numbers import Number
@@ -10,17 +13,18 @@ from typing import Callable, Any, Union
from typing import Dict, List, Pattern
import lxml.etree
import pdfplumber
import yaml
from docx import Document
from lxml.cssselect import CSSSelector
from lxml.etree import _Element
from rapidfuzz import fuzz
import difflib
from .utils import _match_record, _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.general")
def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
if result is None:
return 0.
@@ -68,6 +72,7 @@ def is_in_list(result, rules) -> float:
else:
return 0.
def diff_text_file(result: str, expect: str) -> float:
if result is None:
return 0.
@@ -78,12 +83,34 @@ def diff_text_file(result: str, expect: str) -> float:
expected_lines: List[str] = f.read().splitlines()
return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
def fuzzy_match(result, rules) -> float:
expect = rules["expected"]
return fuzz.ratio(result, expect) / 100.
def fuzzy_place_math(result_file_path, rules) -> float:
if result_file_path is None:
return 0.
expect = rules["expected"] # a list of possible answers
# read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
doc = Document(result_file_path)
words_list = []
for para in doc.paragraphs:
words_list.extend(para.text.split())
fuzzy_score_list = []
for word in words_list:
max_score = 0
for ans in expect:
score = fuzz.ratio(word, ans) / 100
max_score = max(max_score, score)
fuzzy_score_list.append(max_score)
if len(fuzzy_score_list) != 3:
return 0.
return sum(fuzzy_score_list) / 3
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
Args:
@@ -191,10 +218,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
return 0.
if "text" in r:
match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
else (lambda a, b: fuzz.ratio(a, b) / 100.)
, r["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
@@ -267,20 +294,193 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
return float(metric)
def check_direct_json_object(result, rules)->float:
def check_direct_json_object(result, rules) -> float:
"""
One of the most commonly used function to evalute.
Compare two json objects directly.
"""
print("result: ")
print(result)
print("expected: ")
print(rules["expected"])
if isinstance(result, str):
# remove blanks before and after result
result = result.strip()
# replace all ' with "
result = result.replace("'", '"')
# load json object
result = json.loads(result)
if result is None:
return 0.
expected_json = rules["expected"]
for key in expected_json.keys():
expected_value = expected_json.get(key)
if expected_value != result.get(key):
return 0.
return 1.0
try:
expect_in_result = rules.get("expect_in_result", False)
if not expect_in_result:
expected_json = rules["expected"]
for key in expected_json.keys():
expected_value = expected_json.get(key)
if expected_value != result.get(key):
return 0.
return 1.0
else:
expected_json = rules["expected"]
for key in expected_json.keys():
if isinstance(expected_json.get(key), List):
flag = 0
expected_value_list = expected_json.get(key)
for each_expected_value in expected_value_list:
if each_expected_value in result.get(key):
flag = 1
break
if flag == 0:
return 0.
elif isinstance(expected_json.get(key), str):
if expected_json.get(key) not in result.get(key):
return 0.
else:
logger.debug("check_direct_json_object: expected value type not supported")
return 0.
return 1.0
except:
logger.debug("check_direct_json_object: result is not a valid json object")
return 0.
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
if not speedtest_result_path:
return 0
# open the speedtest results file(csv)
date_col = None
try:
with open(speedtest_result_path, 'r') as f:
for i, line in enumerate(f):
if i == 1:
date = line.split(',')[1]
break
now_date_time = datetime.datetime.now().strftime('%H:%M')
date_time = date[-5:]
# compare the date time with the current date time, if time diff less than time_diff para, then return true
if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
'%H:%M')).total_seconds()) / 60 < int(
time_diff):
return 0
return 1
except:
logger.debug("compare_time_in_speedtest_results: file not found or not readable")
return 0
def is_included_all_json_objects(gold_file_path, result_file_path):
if not gold_file_path or not result_file_path:
return 0
print("gold_file_path: ")
print(gold_file_path)
print("result_file_path: ")
print(result_file_path)
# two json file, check if all the key-value pair in gold_file_path is included in result_file_path
with open(gold_file_path, 'r') as f:
gold_json = json.load(f)
with open(result_file_path, 'r') as fr:
result_json = json.load(fr)
for key in gold_json.keys():
if key not in result_json.keys() or gold_json[key] != result_json[key]:
return 0
return 1
def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
if not gold_text_path or not pdf_file_path:
return 0
print("gold_text_path: ")
print(gold_text_path)
print("pdf_file_path: ")
print(pdf_file_path)
# gold file is a json file, we need to check all the value in json are included in pdf file.
with open(gold_text_path, 'r') as f:
gold_json = json.load(f)
with pdfplumber.open(pdf_file_path) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
false_list = []
for key in gold_json.keys():
if gold_json[key] not in text:
false_list.append(key)
if len(false_list) > 0:
print("false_list: ")
print(false_list)
return 0
else:
return 1
def file_contains(file_path, config):
# file_path ends with .txt
if not file_path:
return 0.
try:
with open(file_path, 'r') as f:
file_text = f.read()
for text in config["expected"]:
if text not in file_text:
logger.debug(f"file_contains: {text} not found in {file_path}")
return 0.
except:
logger.debug("file_contains: file not found or not readable")
return 0.
return 1.
def check_line_number(file_path, line_number):
# check if file_path exists
if file_path is None or not os.path.isfile(file_path):
return 0.
timeRegex = "([01]\d|2[0-3]):[0-5]\d:[0-5]\d"
# check if the string that matches the timeRegex in this txt file equals to line_number["expected"]
try:
with open(file_path, 'r') as f:
line_count = 0
for line in f:
if re.search(timeRegex, line):
line_count += 1
# if line_count equals to line_number["expected"], return 1, else return 0
return 1 if line_count == int(line_number["expected"]) else 0
except:
logger.debug("check_line_number: file not found or not readable")
return 0.
def compare_terminal_and_txt(txt_file_path, terminal_output):
if not txt_file_path or not terminal_output:
return 0
# read txt file content
with open(txt_file_path, 'r') as f:
txt_file_content = f.read()
# compare terminal output with txt file content
return 1 if terminal_output == txt_file_content else 0
def compare_python_pure_text(py_file_path, gold_file_path):
if not py_file_path or not gold_file_path:
return 0
# first, change the suffix of gold_file from .txt to .py
print("py_file_path: ")
print(py_file_path)
print("gold_file_path: ")
print(gold_file_path)
# gold_file_path = gold_file_path.replace('.txt', '.py')
def remove_whitespace(text):
return ''.join(text.split())
with open(py_file_path, 'r') as file1:
content1 = file1.read()
with open(gold_file_path, 'r') as file2:
content2 = file2.read()
content1_no_whitespace = remove_whitespace(content1)
content2_no_whitespace = remove_whitespace(content2)
if content1_no_whitespace == content2_no_whitespace:
return 1
else:
return 0

View File

@@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat
def compare_image_list(pred_img_path_list: Union[str, List[str]],
gold_img_path_list: Union[str, List[str]]) -> float:
gold_img_path_list: Union[str, List[str]]) -> float:
""" Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
"""
if type(pred_img_path_list) != list:
@@ -177,6 +177,16 @@ def calculate_contrast(image):
return np.std(pixels)
def calculate_image_sharpness(image_path):
# Load the image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Apply the Laplacian operator
laplacian = cv2.Laplacian(image, cv2.CV_64F)
# Calculate the variance
variance = np.var(laplacian)
return variance
def structure_check_by_mse(img1, img2, threshold=0.03):
"""Check if two images are approximately the same by MSE"""
mse = np.mean(
@@ -189,7 +199,7 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
def structure_check_by_ssim(img1, img2, threshold=0.9):
"""Check if two images are approximately the same by SSIM"""
similarity = ssim(np.array(img1), np.array(img2), multichannel=True)
similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1)
print("SSIM: ", similarity)
return similarity >= threshold
@@ -295,7 +305,8 @@ def check_triangle_position(tgt_path):
# We assume the triangle is a different color from the background
# Find the unique colors
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True)
unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0,
return_counts=True)
unique_colors_sorted = unique_colors[np.argsort(counts)]
# Assuming the background is the most common color and the triangle is a different color
@@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path):
return structure_same
def check_structure_sim_resized(src_path, tgt_path):
"""
Check if the structure of the two images are similar after resizing.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
"""
if src_path is None or tgt_path is None:
return 0.
img_src = Image.open(src_path)
img_tgt = Image.open(tgt_path)
# Resize the images to the same size
img_src = img_src.resize(img_tgt.size)
# Check if the structure is similar
structure_same = structure_check_by_ssim(img_src, img_tgt)
return structure_same
def check_contrast_increase_and_structure_sim(src_path, tgt_path):
"""
Check if the src image has higher contrast than the tgt image and the structures are similar
@@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule):
return 0.
def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None):
def check_image_size(src_path, rule):
"""
Check if the size of the src image is correct and the structure of the two images are similar.
gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
Check if the size of the src image is correct
multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081
"""
if src_path is None or tgt_path is None:
if src_path is None:
return 0.
# Load images
source_image = Image.open(src_path)
target_image = Image.open(tgt_path)
# Load the image
img = Image.open(src_path)
# Check size
if width is not None:
width_same = source_image.size[0] == width
else:
width_same = True
if height is not None:
height_same = source_image.size[1] == height
# Check the size
if rule["height"] is not None:
height_same = img.size[1] == rule["height"]
else:
height_same = True
if rule["width"] is not None:
width_same = img.size[0] == rule["width"]
else:
width_same = True
# Check structure
resized_target_image = target_image.resize(source_image.size)
structure_same = structure_check_by_ssim(source_image, resized_target_image)
if width_same and height_same and structure_same:
if height_same and width_same:
return 1.
else:
return 0.
@@ -521,32 +545,26 @@ def check_green_background(src_path, tgt_path):
return 1.
if __name__ == "__main__":
actual_config_path = "../../../cache/sessionrc_test"
rule = {
"key": "hide-docks",
"value": "no"
}
print(check_config_status(actual_config_path, rule))
def check_sharper(src_path, tgt_path):
"""
Check if the source image is sharper than the target image.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
sharpness_src = calculate_image_sharpness(src_path)
sharpness_tgt = calculate_image_sharpness(tgt_path)
return 1.0 if sharpness_src > sharpness_tgt else 0.0
actual_config_path = "../../../cache/action-history_test"
rule = {
"key": ["history-item", "\"filters-vignette\""],
"value": "1"
}
print(check_config_status(actual_config_path, rule))
actual_config_path = "../../../cache/gimprc_test"
rule = {
"key": "undo-levels",
"value": "100"
}
print(check_config_status(actual_config_path, rule))
src_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/green_background_with_object.png"
tgt_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/white_background_with_object.png"
print(check_green_background(src_path, tgt_path))
tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
print(check_triangle_position(tgt_path))
def check_image_file_size(src_path, rule):
"""
Check if the size of the src image within 500KB
"""
if src_path is None:
return 0.0
# Check the size
file_size = os.path.getsize(src_path)
if file_size < rule["max_size"]:
return 1.0
else:
return 0.0

View File

@@ -26,13 +26,3 @@ def check_libre_locale(config_file: str, rules: Dict[str, List[str]]) -> float:
for ptn in rules["locale_set"]
)
)
if __name__ == "__main__":
path1 = "../../任务数据/LibreOffice Calc/registrymodifications.ru.xcu"
print(check_libre_locale(path1, {"locale_set": ["ru-*", "de-*", "fr-*"
, "pt-*", "es-*", "it-*"
]
}
)
)

View File

@@ -1,20 +1,20 @@
import zipfile
import os.path
import logging
import os
import os.path
import zipfile
from typing import List, Dict
from typing import Union, TypeVar
import lxml.html
from lxml.html import HtmlElement
from typing import List, Dict
from typing import Union, TypeVar
from mutagen.easyid3 import EasyID3
from .general import diff_text_file
from .utils import _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.others")
def process_epub(filename: str) -> List[str]:
file_list: List[str] = []
@@ -23,7 +23,7 @@ def process_epub(filename: str) -> List[str]:
try:
with zipfile.ZipFile(filename, "r") as z_f:
with z_f.open("toc.ncx") as in_f\
with z_f.open("toc.ncx") as in_f \
, open(os.path.join(base_dir, "toc.ncx"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
@@ -31,7 +31,7 @@ def process_epub(filename: str) -> List[str]:
if "navPoint" not in l:
out_f.write(l + "\n")
file_list.append(os.path.join(base_dir, "toc.ncx"))
with z_f.open("content.opf") as in_f\
with z_f.open("content.opf") as in_f \
, open(os.path.join(base_dir, "content.opf"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
@@ -41,14 +41,14 @@ def process_epub(filename: str) -> List[str]:
file_list.append(os.path.join(base_dir, "content.opf"))
for f_n in z_f.namelist():
if f_n.endswith(".html"):
with z_f.open(f_n) as in_f\
with z_f.open(f_n) as in_f \
, open(os.path.join(base_dir, f_n), "w") as out_f:
html: HtmlElement = lxml.html.fromstring(
''.join( filter( lambda ch: ch!="\n" and ch!="\r"
, in_f.read().decode()
)
).encode()
)
''.join(filter(lambda ch: ch != "\n" and ch != "\r"
, in_f.read().decode()
)
).encode()
)
out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode"))
file_list.append(os.path.join(base_dir, f_n))
logger.debug("%s: %s", filename, file_list)
@@ -56,6 +56,7 @@ def process_epub(filename: str) -> List[str]:
except zipfile.BadZipFile:
return []
def compare_epub(result: str, expected: str) -> float:
if result is None:
return 0.
@@ -69,8 +70,10 @@ def compare_epub(result: str, expected: str) -> float:
metric *= current_metric
return metric
V = TypeVar("Value")
def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool:
# checks using _match_value_to_rule
if result is None:
@@ -85,44 +88,3 @@ def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bo
logger.debug("%s.%s: %s", result, k, value)
metric = metric and _match_value_to_rule(value, r)
return float(metric)
if __name__ == "__main__":
import datetime
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3"
, { "title": { "method": "eq"
, "ref": "Missing You"
}
, "artist": { "method": "eq"
, "ref": "Cheng Xiang"
}
}
)
print(metric)

View File

@@ -2,6 +2,7 @@ import operator
from typing import Any
from typing import Dict
import fitz # PyMuPDF
from pypdf import PdfReader
@@ -11,3 +12,20 @@ def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
reader = PdfReader(pdf_file)
nb_pages: int = len(reader.pages)
return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))
def extract_answers_from_pdf(pdf_file):
doc = fitz.open(pdf_file)
answers = []
for page in doc:
text = page.get_text()
lines = text.split('\n')
for line in lines:
if line.strip():
parts = line.split('=')
if len(parts) > 1:
answer = parts[-1].strip()
answers.append(answer)
return answers

View File

@@ -165,23 +165,24 @@ def compare_pptx_files(file1_path, file2_path, **options):
# compare the content of each slide
for slide1, slide2 in zip(prs1.slides, prs2.slides):
slide_idx += 1
def get_slide_background_color(slide):
background = slide.background
if background.fill.background():
return background.fill.fore_color.rgb
else:
return None
if get_slide_background_color(slide1) != get_slide_background_color(slide2) and examine_background_color:
return 0
def get_slide_notes(slide):
notes_slide = slide.notes_slide
if notes_slide:
return notes_slide.notes_text_frame.text
else:
return None
if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note:
return 0
# check if the shapes are the same
@@ -192,14 +193,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_table_bottom_position:
if slide_idx == 3 and shape1.shape_type == 19 and shape2.shape_type == 19:
if shape1.top <= shape2.top or shape1.top < 3600000:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_right_position:
if slide_idx == 2 and not hasattr(shape1, "text") and not hasattr(shape2, "text"):
if shape1.left <= shape2.left or shape1.left < 4320000:
@@ -207,28 +208,31 @@ def compare_pptx_files(file1_path, file2_path, **options):
if examine_top_position:
if slide_idx == 2 and shape1.shape_type == 13 and shape2.shape_type == 13:
if shape1.top >= shape2.top or shape1.top > 1980000:
return 0
if shape1.top >= shape2.top or shape1.top > 1980000:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_shape_for_shift_size:
if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
if not (hasattr(shape1, "text") and hasattr(shape2, "text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
if not (hasattr(shape1, "text") and hasattr(shape2,
"text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
return 0
if (shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
if (
shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
return 0
if examine_image_size:
if shape1.shape_type == 13 and shape2.shape_type == 13:
if shape1.width != shape2.width or shape1.height != shape2.height:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_modify_height:
if not hasattr(shape1, "text") and not hasattr(shape2, "text") or shape1.shape_type == 5 and shape2.shape_type == 5:
if not hasattr(shape1, "text") and not hasattr(shape2,
"text") or shape1.shape_type == 5 and shape2.shape_type == 5:
if shape1.height != shape2.height:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
@@ -236,13 +240,13 @@ def compare_pptx_files(file1_path, file2_path, **options):
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text.strip() != shape2.text.strip() and examine_text:
return 0
# check if the paragraphs are the same
return 0
# check if the paragraphs are the same
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
if para1.alignment != para2.alignment and examine_alignment:
return 0
# check if the runs are the same
if para1.text != para2.text and examine_text:
return 0
@@ -253,7 +257,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
for run1, run2 in zip(para1.runs, para2.runs):
# check if the font properties are the same
if run1.font.name != run2.font.name and examine_font_name:
if run1.font.name != run2.font.name and examine_font_name:
return 0
if run1.font.size != run2.font.size and examine_font_size:
@@ -305,10 +309,9 @@ def compare_pptx_files(file1_path, file2_path, **options):
return bullets
if examine_bullets and _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets(run2.part.blob.decode('utf-8')):
if examine_bullets and _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets(
run2.part.blob.decode('utf-8')):
return 0
# fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data
@@ -524,15 +527,3 @@ def check_auto_saving_time(pptx_file, rules):
logger.error(f"Error parsing XML: {e}")
except FileNotFoundError:
logger.error(f"File not found: {pptx_file}")
if __name__ == '__main__':
# print(compare_pptx_files(
# r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx",
# r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx"))
# print(evaluate_presentation_fill_to_rgb_distance(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\3b27600c-3668-4abd-8f84-7bcdebbccbdb\lec17-gui-events.pptx", {"rgb": (0, 0, 255)}))
# print(check_auto_saving_time(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\2cd43775-7085-45d8-89fa-9e35c0a915cf\registrymodifications.xcu", {"minutes": 3}))
print(compare_pptx_files(
r"D:\NJU\HKUNLP\Desktop-Env\DesktopEnv\cache\08aced46-45a2-48d7-993b-ed3fb5b32302\22_6_Gold.pptx",
r"D:\NJU\HKUNLP\Desktop-Env\DesktopEnv\cache\08aced46-45a2-48d7-993b-ed3fb5b32302\22_6.pptx",
examine_shape=False))

View File

@@ -11,16 +11,16 @@ import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
, load_filters, load_pivot_tables
from rapidfuzz import fuzz
from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value
from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \
, load_filters, load_pivot_tables
# from openpyxl.utils import coordinate_to_tuple
logger = logging.getLogger("desktopenv.metric.table")
@@ -165,7 +165,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
logger.debug("Sheet1: \n%s", str(sheet1))
logger.debug("Sheet2: \n%s", str(sheet2))
try:
logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1 == sheet2))
except:
logger.debug("Sheet1 =/v= Sheet2")
logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
@@ -231,14 +231,14 @@ def compare_table(result: str, expected: str = None, **options) -> float:
value1 = value1.lower()
value2 = value2.lower()
if rl["type"]=="includes":
if rl["type"] == "includes":
metric: bool = value2 in value1
elif rl["type"]=="included_by":
elif rl["type"] == "included_by":
metric: bool = value1 in value2
elif rl["type"]=="fuzzy_match":
elif rl["type"] == "fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
elif rl["type"]=="exact_match":
metric: bool = value1==value2
elif rl["type"] == "exact_match":
metric: bool = value1 == value2
total_metric = total_metric and metric
metric: bool = total_metric
@@ -409,7 +409,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
filters1: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
filters2: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = filters1==filters2
metric: bool = filters1 == filters2
logger.debug("Assertion: %s[filter] == %s[filter] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Filters #
@@ -421,7 +421,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
pivots1: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
pivots2: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = pivots1==pivots2
metric: bool = pivots1 == pivots2
logger.debug("Assertion: %s[pivot]==%s[pivot] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Pivot Tables #
@@ -482,81 +482,36 @@ def compare_csv(result: str, expected: str, **options) -> float:
return float(metric)
if __name__ == '__main__':
import datetime
import sys
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
for row in sheet["C2:C22"]:
for cell in row:
actual_city_list.append(cell.value)
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
try:
for i in range(len(actual_city_list)):
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
elif isinstance(expected_city_list[i], List):
if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
else:
raise TypeError("Expected city should be a string or a list of strings")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
except:
return 0.
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
path1 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday.xlsx"
path2 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday_gold.xlsx"
rules = [ { "type": "sheet_data"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
]
print(compare_table(path1, path2
, rules=rules
)
)
print(compare_table(path2, path2
, rules=rules
)
)
# Row Properties
# path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet: Worksheet = workbook.active
# for r_no, dms in worksheet.column_dimensions.items():
# print(r_no, type(r_no), type(dms), dms.hidden)
# Conditional Formats
# import formulas
# path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
# path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet: Worksheet = workbook.active
# print(worksheet.conditional_formatting)
# for itm in worksheet.conditional_formatting:
# print(itm.cells)
# for r in itm.rules:
# print( r.type, r.formula, r.dxf.font.color.rgb
# , r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
# )
# condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
##print(r.type, r.operator, r.dxfId, r.dxf)
# for r in itm.cells:
# for c in r.cells:
# value = worksheet.cell(row=c[0], column=c[1]).value
# print(value, condition(str(value)))
return 1.

View File

@@ -1,17 +1,19 @@
import json
import logging
import re
from typing import List, Pattern, Dict, Match
from typing import Union, Any, TypeVar, Callable
import re
import json
from .utils import _match_record
from .utils import _match_value_to_rule as _match_pref
import logging
logger = logging.getLogger("desktopenv.metric.thunderbird")
V = TypeVar("Value")
_pref_pattern: Pattern[str] = re.compile(r'^user_pref\("(?P<key>(?:[^"]|\\")+)\", (?P<val>.+)\);$');
def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any]]]):
"""
Args:
@@ -51,10 +53,10 @@ def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any
continue
key: str = match_.group("key")
#value: str = match_.group("val")
#if value in {"true", "false"}:
#value = value.title()
#value: V = eval(value)
# value: str = match_.group("val")
# if value in {"true", "false"}:
# value = value.title()
# value: V = eval(value)
value = json.loads(match_.group("val"))
if key in expect_rules:
logger.debug("K: %s, V: %s", key, repr(value))
@@ -64,9 +66,13 @@ def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any
return float(all(expect_metrics.values()) and unexpect_metric)
_value_processor: Callable[[str], str] = lambda val: val.replace("\\\"", "\"").replace("\\\\", "\\")
#_condition_pattern: Pattern[str] = re.compile(r'(?P<type>AND|OR) \((?P<key>[\w ]+),(?P<rel>[\w ' + '\'' + r']+),(?:"(?P<val2>(?:[^"]|\")+)"|(?P<val1>[^)]+))\)')
_condition_pattern: Pattern[str] = re.compile(r'\b(?:AND|OR) \((?:[\w ]+),(?:[\w ' + '\'' + r']+),(?:"(?:(?:[^"]|\")+)"|(?:[^)]+))\)|\bALL\b')
# _condition_pattern: Pattern[str] = re.compile(r'(?P<type>AND|OR) \((?P<key>[\w ]+),(?P<rel>[\w ' + '\'' + r']+),(?:"(?P<val2>(?:[^"]|\")+)"|(?P<val1>[^)]+))\)')
_condition_pattern: Pattern[str] = re.compile(
r'\b(?:AND|OR) \((?:[\w ]+),(?:[\w ' + '\'' + r']+),(?:"(?:(?:[^"]|\")+)"|(?:[^)]+))\)|\bALL\b')
def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
Args:
@@ -112,8 +118,8 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]
condition_str: str = _value_processor(l[11:-2])
logger.debug("FILTER CONDITION: %s", condition_str)
conditions: List[str] =\
_condition_pattern.findall(condition_str)
conditions: List[str] = \
_condition_pattern.findall(condition_str)
logger.debug("FILTER CONDITIONS: %s", repr(conditions))
filter_["condition"] = conditions
@@ -138,6 +144,7 @@ def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str
remove_deleted (bool): ignore deleted messages which has status code 0008 or 0009. default: True
remove_duplicate (bool): remove duplicate messages. default: True
"""
def normalize_msg(msg, options):
ignore_status = options.get('ignore_status', False)
ignore_keys = options.get('ignore_keys', False)
@@ -167,66 +174,3 @@ def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str
mail2 = read_thunderbird_folder_file(gold)
if mail1 != mail2: return .0
return 1.0
if __name__ == "__main__":
#import lxml.etree
#from lxml.cssselect import CSSSelector
#from lxml.etree import _Element
#xml = "../../任务数据/Thunderbird/vertical-card-view.xml"
#xml = "../../任务数据/Thunderbird/vertical-table-view.xml"
#at: _Element = lxml.etree.parse(xml)
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] page-tab-list')(at) # page tab tags
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]')(at) # email tag page
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section:nth-child(3)')(at) # email tag page
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section[attr|id=threadPane]>section[attr|id="threadTree"]>table[attr|class="tree-table"]>section[attr|class~="tree-table-header"]>table-row>column-header[name=Subject]>push-button', namespaces={"attr": "uri:deskat:attributes.at-spi.gnome.org"})(at) # table view, column header
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section[attr|id=threadPane]>section[attr|id="threadTree"]>table[attr|class="tree-table"]>tree>tree-item>section[name="Subject"]>section>section', namespaces={"attr": "uri:deskat:attributes.at-spi.gnome.org"})(at) # table view, column header
#print(len(elements))
#for elm in elements:
#print(lxml.etree.tostring(elm, encoding="unicode", pretty_print=True))
import datetime
import os
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
print( check_thunderbird_filter( "../../任务数据/Thunderbird/msgFilterRules.dat"
, { "expect": [ { "enabled": "yes"
, "action": "Move to folder"
, "actionValue": "mailbox://nobody@Local%20Folders/Promotions"
, "condition": ["AND (subject,contains,discount)"]
}
]
}
)
)

View File

@@ -1,10 +1,12 @@
import builtins
import datetime
import functools
import itertools
import logging
import operator
import re
import zipfile
import pandas as pd
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match, Tuple, Pattern
from urllib.parse import urlparse, urlunparse

View File

@@ -2,6 +2,7 @@ import copy
import importlib.util
import json
import sys
import re
from typing import Dict
@@ -86,11 +87,44 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
with open(expected) as f2:
expected_text = f2.read()
ignore_blanks = options.get('ignore_blanks', False)
if ignore_blanks:
actual_text = re.sub(r'[\t\n]', ' ', actual_text).strip()
actual_text = re.sub(r'\s+', ' ', actual_text)
expected_text = re.sub(r'[\t\n]', ' ', expected_text).strip()
expected_text = re.sub(r'\s+', ' ', expected_text)
ignore_case = options.get('ignore_case', False)
if ignore_case:
actual_text = actual_text.lower()
expected_text = expected_text.lower()
if actual_text == expected_text:
return 1.0
return 0.0
import zipfile
from difflib import SequenceMatcher
import PyPDF2
def compare_pdf_content(content1, content2, text_similarity_threshold):
def extract_text_from_pdf(content):
with open("temp.pdf", "wb") as temp_pdf:
temp_pdf.write(content)
with open("temp.pdf", "rb") as temp_pdf:
pdf_reader = PyPDF2.PdfReader(temp_pdf)
text = ''
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
text1 = extract_text_from_pdf(content1)
text2 = extract_text_from_pdf(content2)
similarity_ratio = SequenceMatcher(None, text1, text2).ratio()
return similarity_ratio >= text_similarity_threshold
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
@@ -115,7 +149,12 @@ def compare_zip_files(actual: str, expected: str, **options) -> float:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
if file_name.lower().endswith('.pdf'):
if compare_pdf_content(content1, content2, 0.95):
continue
else:
return 0.0
elif content1 != content2:
return 0.0
return 1.0
@@ -190,3 +229,45 @@ def check_python_file_by_test_suite(actual_files, test_file, **options) -> float
def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
pass
def check_html_background_image(src_path: str, rule: Dict = None) -> float:
"""
Check if the background image is correctly set.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
from bs4 import BeautifulSoup
with open(src_path, 'r') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
styles = soup.find_all('style')
for style in styles:
if f'background-image: url(\'{rule["value"]}\')' in style.text:
return 1.0
return 0.0
def compare_result_files(src_path, tgt_path):
"""
Compare whether the content of two files are the same.
multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
"""
with open(src_path, 'r') as f:
src_content = f.read().strip()
with open(tgt_path, 'r') as f:
tgt_content = f.read().strip()
try:
# Compare the content as numbers
tgt_content_num = float(tgt_content)
if tgt_content in src_content:
# If the content of tgt is in src, return 1.0 since output src might be
# a superset(language description+number) of tgt
return 1.0
src_content_num = float(src_content)
if abs(src_content_num - tgt_content_num) < 1e-4:
return 1.0
return 0.0
except:
if src_content == tgt_content:
return 1.0
return 0.0

View File

@@ -117,7 +117,7 @@ def launch_app():
def capture_screen_with_cursor():
# fixme: when running on virtual machines, the cursor is not captured, don't know why
file_path = os.path.join("screenshots", "screenshot.png")
file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png")
user_platform = platform.system()
# Ensure the screenshots directory exists

View File

@@ -53,17 +53,32 @@
"chrome"
],
"evaluator": {
"func": "is_expected_active_tab",
"result": {
"func": ["is_expected_active_tab", "is_expected_active_tab"],
"conj": "or",
"result": [
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
"expected": {
},
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
}
],
"expected": [
{
"type": "rule",
"rules": {
"type": "url",
"url": "https://www.drugs.com/npc/"
}
}
},
{
"type": "rule",
"rules": {
"type": "url",
"url": "https://www.drugs.com/npp/"
}
}]
}
}

View File

@@ -62,8 +62,8 @@
"type": "rule",
"rules":{
"expected": {
"locationName": "Zurich Airport",
"dropLocationName": "Zurich Airport",
"locationName": "Zürich",
"dropLocationName": "Zürich",
"filterCriteria_carCategory": "large",
"filterCriteria_sortBy": "PRICE"
}

View File

@@ -1,7 +1,7 @@
{
"id": "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"snapshot": "chrome",
"instruction": "Can you make a new folder for me on that bookmarks bar in my internet browser? Let's call it 'Favorites.'",
"instruction": "Can you make a new folder for me on the bookmarks bar in my internet browser? Let's call it 'Favorites.'",
"source": "https://www.youtube.com/watch?v=IN-Eq_UripQ",
"config": [
{

View File

@@ -54,7 +54,7 @@
"rules":{
"expected": {
"q": "drip coffee maker",
"tbs": "mr:1,price:1,ppr_min:25,ppr_max:60,pdtr0:1825161|1825162"
"tbs": "mr:1,price:1,ppr_min:25,ppr_max:60,sales:1,pdtr0:1825161|1825162"
}
}
}

View File

@@ -29,6 +29,15 @@
"chrome"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": "pkill chrome",
"shell": "true"
}
}
],
"func": "exact_match",
"result": {
"type": "enable_enhanced_safety_browsing"

View File

@@ -1,7 +1,7 @@
{
"id": "99146c54-4f37-4ab8-9327-5f3291665e1e",
"snapshot": "chrome",
"instruction": "Please help me set Chrome to delete my browsing history automatically every time I close the browser.",
"instruction": "Please help me set Chrome to delete my browsing data automatically every time I close the browser.",
"source": "https://www.youtube.com/watch?v=v0kxqB7Xa6I",
"config": [
{
@@ -29,6 +29,13 @@
"chrome"
],
"evaluator": {
"postconfig":[{
"type": "execute",
"parameters": {
"command": "pkill chrome",
"shell": "true"
}
}],
"func": "exact_match",
"result": {
"type": "data_delete_automacally"
@@ -36,7 +43,7 @@
"expected": {
"type": "rule",
"rules": {
"expected": "Crashed"
"expected": "true"
}
}
}

View File

@@ -43,19 +43,35 @@
"chrome"
],
"evaluator": {
"func": "exact_match",
"result": {
"func": ["exact_match", "exact_match"],
"conj": "or",
"result": [
{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": false,
"returnType": "string"
},
"expected": {
},
{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": false,
"returnType": "string"
}],
"expected": [
{
"type": "rule",
"rules": {
"expected": "tamiflu.html#side-effects"
}
}
},
{
"type": "rule",
"rules": {
"expected": "tamiflu-side-effects.html"
}
}]
}
}

View File

@@ -53,7 +53,7 @@
"rules": {
"expected": [
"AgeAppropriate:Kids",
"search=spider-man%20toys",
"search=spider[-%20]?man%20toys",
"S=4"
]
}

View File

@@ -84,16 +84,34 @@
}
}
],
"func": "check_image_size_and_structure_sim",
"expected":{
"func": [
"check_image_size",
"check_structure_sim"
],
"expected": [
{
"type": "vm_file",
"path": "/home/user/Desktop/dog_with_background.png",
"dest": "dog_with_background.png"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/resized.png",
"dest": "resized.png"
}
{
"type": "vm_file",
"path": "/home/user/Desktop/dog_with_background.png",
"dest": "dog_with_background.png"
}
],
"result": [
{
"type": "rule",
"rules": {
"height": 512
}
},
{
"type": "vm_file",
"path": "/home/user/Desktop/resized.png",
"dest": "resized.png"
}
]
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "02ce9a50-7af2-47ed-8596-af0c230501f8",
"snapshot": "libreoffice_writer",
"instruction": "I'm using libreoffice writer to write a tutorial about linux, and now I want to show the results obtained by using the \"ls\" command in /home/user. Please run this command and save the screenshot as 'ls.png' on Desktop.",
"instruction": "I am currently utilizing LibreOffice Writer to compose a Linux tutorial, and I intend to display the outcomes generated by executing the \"ls\" command in /home/user. Kindly execute this command and save the screenshot as 'ls.png' on the Desktop.",
"source": "authors",
"config": [
{
@@ -54,7 +54,7 @@
"type": "rule",
"rules": {
"type": "text",
"text": "$ ls\n"
"text": " Ls"
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "09a37c51-e625-49f4-a514-20a773797a8a",
"snapshot": "libreoffice_writer",
"instruction": "I received a request from my friend that he wanted me to help him modify a picture. On the Desktop is the requirement doc and the picture to be adjusted. Modify the image as he said and save modified pic as \"pic.jpg\" on Desktop. Thanks!",
"instruction": "I've received a request from my friend who asked for assistance in editing an image. The document with the requirements and the picture to be adjusted are on the Desktop. Please make the necessary modifications to the image as his instructions and save the edited picture as \"pic.jpg\" on the Desktop. Thank you!",
"source": "authors",
"config": [
{
@@ -37,7 +37,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ee1vNyG7gGpLKK2VlLfj6PxcmdkMdvqK&export=download&authuser=0&confirm=t&uuid=1f441c5d-b62d-4850-870f-8e8f113a4091&at=APZUnTWEvKSSkuGBWzen0S9L7aHP:1709727474803",
"dest": "pic.jpg"
"dest": "pic_Gold.jpg"
},
"result": {
"type": "vm_file",

View File

@@ -1,94 +1,109 @@
{
"id": "0c825995-5b70-4526-b663-113f4c999dd2",
"snapshot": "libreoffice_calc",
"instruction": "I'm working on a comprehensive report for our environmental policy review meeting next week. I need to integrate key insights from an important document, which is a guidebook on the Green Economy, where I'm particularly interested in the 'Introduction' section. Could you extract this section and compile them into a new Google Doc named 'environment_policy_report (draft)' under /environment_policy folder? This will significantly aid in our discussion on aligning our environmental policies with sustainable and green economic practices. Thanks!",
"source": "authors",
"config": [
{
"type": "googledrive",
"parameters": {
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"operation": ["delete"],
"args": [
{
"query": "title = 'environment_policy_report (draft).doc' or title = 'environment_policy_report (draft).docx' or title = 'environment_policy_report (draft)'",
"trash": false
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "login",
"parameters": {
"settings_file": "evaluation_examples/settings/google/settings.json",
"platform": "googledrive"
}
},
{
"type": "command",
"parameters": {
"command": ["mkdir", "-p", "/home/user/Desktop/wwf"]
}
},
{
"type": "download",
"parameters": {
"files": [
{"path": "/home/user/Desktop/wwf/lpr_living_planet_report_2016.pdf", "url": "https://drive.google.com/uc?id=19NCdw_MVP6nH5nC6okYYe8U1mJABfTRK&export=download"},
{"path": "/home/user/Desktop/wwf/279c656a32_ENGLISH_FULL.pdf", "url": "https://drive.google.com/uc?id=1ckH1NetfImQ9EyONTO-ZFWA8m8VIUFvD&export=download"},
{"path": "/home/user/Desktop/wwf/7g37j96psg_WWF_AR2021_spreads.pdf", "url": "https://drive.google.com/uc?id=1cxLTzmqDKMomOyvho29lvFvhRnb0Y8__&export=download"},
{"path": "/home/user/Desktop/GE Guidebook.pdf", "url": "https://drive.google.com/uc?id=1KzC_R3eI3Rmgwz5bkcI8Ohv7ebOrU-Is&export=download"},
{"path": "/home/user/Desktop/assessing_and_reporting_water_quality(q&a).pdf", "url": "https://drive.google.com/uc?id=1LFojf3Weflv3fVdrZrgTY1iUaRdbT9kG&export=download"}
]
}
}
],
"trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2",
"related_apps": [
"libreoffice_calc",
"chrome",
"os"
],
"evaluator": {
"func": "compare_docx_files",
"result": {
"type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"path_list": [
[
"environment_policy_report (draft).docx"
]
],
"dest": [
"environment_policy_report (draft).docx"
]
},
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1A2ti9JncAfIa6ks7FTJWHtYlZo-68FtM&export=download",
"dest": "environment_policy_report (draft)_gold.docx"
},
"options": {
"content_only": true
}
}
"id": "0c825995-5b70-4526-b663-113f4c999dd2",
"snapshot": "libreoffice_calc",
"instruction": "I'm working on a comprehensive report for our environmental policy review meeting next week. I need to integrate key insights from an important document, which is a guidebook on the Green Economy, where I'm particularly interested in the 'Introduction' section. Could you extract this section and compile them into a new Google Doc named 'environment_policy_report (draft)' under /environment_policy folder? This will significantly aid in our discussion on aligning our environmental policies with sustainable and green economic practices. Thanks!",
"source": "authors",
"config": [
{
"type": "googledrive",
"parameters": {
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"operation": [
"delete"
],
"args": [
{
"query": "title = 'environment_policy_report (draft).doc' or title = 'environment_policy_report (draft).docx' or title = 'environment_policy_report (draft)'",
"trash": false
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "login",
"parameters": {
"settings_file": "evaluation_examples/settings/google/settings.json",
"platform": "googledrive"
}
},
{
"type": "command",
"parameters": {
"command": [
"mkdir",
"-p",
"/home/user/Desktop/wwf"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"path": "/home/user/Desktop/wwf/lpr_living_planet_report_2016.pdf",
"url": "https://drive.google.com/uc?id=19NCdw_MVP6nH5nC6okYYe8U1mJABfTRK&export=download"
},
{
"path": "/home/user/Desktop/wwf/279c656a32_ENGLISH_FULL.pdf",
"url": "https://drive.google.com/uc?id=1ckH1NetfImQ9EyONTO-ZFWA8m8VIUFvD&export=download"
},
{
"path": "/home/user/Desktop/wwf/7g37j96psg_WWF_AR2021_spreads.pdf",
"url": "https://drive.google.com/uc?id=1cxLTzmqDKMomOyvho29lvFvhRnb0Y8__&export=download"
},
{
"path": "/home/user/Desktop/GE Guidebook.pdf",
"url": "https://drive.google.com/uc?id=1KzC_R3eI3Rmgwz5bkcI8Ohv7ebOrU-Is&export=download"
},
{
"path": "/home/user/Desktop/assessing_and_reporting_water_quality(q&a).pdf",
"url": "https://drive.google.com/uc?id=1LFojf3Weflv3fVdrZrgTY1iUaRdbT9kG&export=download"
}
]
}
}
],
"trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2",
"related_apps": [
"libreoffice_calc",
"chrome",
"os"
],
"evaluator": {
"func": "compare_docx_files",
"result": {
"type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"path": ["environment_policy", "environment_policy_report (draft)"],
"dest": "environment_policy_report (draft).docx"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1A2ti9JncAfIa6ks7FTJWHtYlZo-68FtM&export=download",
"dest": "environment_policy_report (draft)_gold.docx"
},
"options": {
"content_only": true
}
}
}

View File

@@ -0,0 +1,99 @@
{
"id": "0e5303d4-8820-42f6-b18d-daf7e633de21",
"snapshot": "chrome",
"instruction": "I want to learn python programming and my friend recommends me this course website. I have grabbed the lecture slide for week 0. Please download the PDFs for other weeks into the opened folder and leave the file name as-it-is.",
"source": "authors",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://cs50.harvard.edu/python/2022/weeks/0/"
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"mkdir",
"-p",
"/home/user/lecture_slides"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1OdvHgcHXSn62xXe_VrPTN0HLWHmrcfdY&export=download&authuser=0&confirm=t",
"path": "/home/user/lecture_slides/lecture0.pdf"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"nautilus",
"/home/user/lecture_slides"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"os",
"chrome"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"/bin/bash",
"-c",
"cd /home/user && zip -qr lecture_slides.zip lecture_slides/"
]
}
}
],
"func": "compare_archive",
"result": {
"type": "vm_file",
"path": "/home/user/lecture_slides.zip",
"dest": "lecture_slides.zip"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ej2iHG8p-QJe7FZQpPIIS82BHOlFAUQM&export=download&authuser=0&confirm=t",
"dest": "gold_lecture_slides.zip"
},
"options": {
"file_path": "lecture_slides",
"file_type": "pdf"
}
}
}

View File

@@ -0,0 +1,116 @@
{
"id": "1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
"snapshot": "libreoffice_calc",
"instruction": "I've prepared some grammar tests and placed them in the 'Grammar test' folder. I've already provided the multiple-choice answers for Test 1 in the 'answer doc' file. Could you please follow the same format to write out the answers for the remaining two tests in the doc file? This way, I can distribute them to the students as a reference. Thank you.",
"source": "authors",
"config": [
{
"type": "command",
"parameters": {
"command": [
"mkdir",
"-p",
"/home/user/Desktop/students work/",
"/home/user/Desktop/Lec powerpoint/",
"/home/user/Desktop/Grammar test/",
"/home/user/Desktop/Grammar rules PDF/",
"/home/user/Desktop/FDI/"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"path": "/home/user/Desktop/Grammer test 1.docx",
"url": "https://drive.google.com/uc?id=1VaXQ9XdzMv079xKFs0Y2XrwdmwFHIvBK&export=download"
},
{
"path": "/home/user/Desktop/Grammer test 2.docx",
"url": "https://drive.google.com/uc?id=1k2T88WreTwi-Yyp9mEJnreEQC3DdkJ2x&export=download"
},
{
"path": "/home/user/Desktop/Grammer test 3.docx",
"url": "https://drive.google.com/uc?id=1QgyQWVOcAJuPaSlrywb9nuFiQDySsTb2&export=download"
},
{
"path": "/home/user/Desktop/Answer.docx",
"url": "https://drive.google.com/uc?id=1BC2DuWJuZggmf6fXl6Ys9xQMZzU6a1br&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf",
"url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf",
"url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf",
"url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf",
"url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
},
{
"path": "/home/user/Desktop/Public Lecture Teaching Plan.docx",
"url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
},
{
"path": "/home/user/Desktop/Course Timetable.xlsx",
"url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
}
]
}
}
],
"trajectory": "trajectories/1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
"related_apps": [
"os",
"libreoffice_writer"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Answer.docx - LibreOffice Writer",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
]
}
}
],
"func": "compare_docx_files",
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1TOMGWC3OFuP6yEGQuRJMEFWdg2NcBPSs&export=download",
"dest": "Answer gold.docx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/Answer.docx",
"dest": "Answer.docx"
},
"options": {
"ignore_case": true,
"ignore_blanks": true
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "20236825-b5df-46e7-89bf-62e1d640a897",
"snapshot": "vscode",
"instruction": "I am coding on my algorithm practice. The doc \"bubble_Sort_tutorial.docx\" is the document for it. Help me finish the function 'bubbleSort' in 'bubbleSort.py' on the Desktop save the output in 'res.txt' on Desktop.",
"instruction": "I am currently working on my algorithm practice using the document \"bubble_Sort_tutorial.docx.\" Please assist me in completing the 'bubbleSort' function within the 'bubbleSort.py' file on the Desktop and save the output as 'res.txt' on the Desktop.",
"source": "authors",
"config": [
{
@@ -47,7 +47,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315",
"dest": "res.txt"
"dest": "res_Gold.txt"
},
"result": {
"type": "vm_file",

View File

@@ -1,7 +1,7 @@
{
"id": "227d2f97-562b-4ccb-ae47-a5ec9e142fbb",
"snapshot": "gimp",
"instruction": "I have my .xcf file saved on Desktop. Could you help me copy the image and paste it into a Libreoffice Writer file? Save it as 'image.docx' on the Desktop.",
"instruction": "I've stored my .xcf file on the Desktop. Can you assist me in copying the image and pasting it into a LibreOffice Writer document? Save the document as 'image.docx' on the Desktop, please.",
"source": "authors",
"config": [
{
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292",
"dest": "image.docx"
"dest": "image_Gold.docx"
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "236833a3-5704-47fc-888c-4f298f09f799",
"snapshot": "chrome",
"instruction": "Find daily papers on Huggingface and take down all the titles, authors and the abstracts of papers on 1st March, 2024 in the doc file 'paper_reading_2024_03_01.docx' on desktop. Each paragraph (split by empty lines) conforms to the following format:\nTitle: xxx\nAuthors: xxx, xxx, xxx\nAbstract: xxxxxxxx.\nArxiv PDF: https://xxxx.pdf",
"instruction": "Find the daily paper list on Huggingface and take down the meta information of papers on 1st March, 2024 in the opened .docx file. I have recorded two papers. Please conform to the format and complete others.",
"source": "authors",
"config": [
{
@@ -31,12 +31,24 @@
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1WEDfILO-NijZBGArZ3ovO1933uHeOi1A&export=download&authuser=0&confirm=t",
"path": "/home/user/Desktop/paper_reading_2024_03_01.docx"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"libreoffice",
"--writer"
"--writer",
"/home/user/Desktop/paper_reading_2024_03_01.docx"
]
}
}
@@ -55,7 +67,7 @@
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1TUTihXD93bIlekuYy_44fmXAhI1KVol4&export=download&authuser=0&confirm=t",
"path": "https://drive.usercontent.google.com/download?id=1wb0sQnVDCAz8sS49kO8boJIa1kqI5mx0&export=download&authuser=0&confirm=t",
"dest": "gold_paper_reading_2024_03_01.docx"
},
"options": {

View File

@@ -0,0 +1,49 @@
{
"id": "2373b66a-092d-44cb-bfd7-82e86e7a3b4d",
"snapshot": "multiapps",
"instruction": "I want to understand the resource usage of my Ubuntu system under normal workloads. Please use the `sar` command in the `sysstat` toolkit to monitor system activity, evaluate the status once every second for 30 seconds, output the results to \"System_Resources_Report.txt\" under Desktop.",
"source": "author",
"config": [
{
"type": "command",
"parameters":{
"command": "echo password | sudo -S apt-get update && echo password | sudo -S apt-get install sysstat",
"shell": "true"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"os", "calc"
],
"evaluator": {
"func": ["file_contains", "check_line_number"],
"result":
[
{
"type": "vm_file",
"path": "/home/user/Desktop/System_Resources_Report.txt",
"dest": "System_Resources_Report.txt"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/System_Resources_Report.txt",
"dest": "System_Resources_Report.txt"
}
],
"expected":
[
{
"type": "rule",
"rules" :{
"expected": ["CPU", "%user","%nice","%system", "%iowait", "%steal", "%idle"]
}
},
{
"type": "rule",
"rules": {
"expected": "31"
}
}]
}
}

View File

@@ -1,25 +1,69 @@
{
"id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"snapshot": "libreoffice_calc",
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed.",
"source": "authors",
"config": [
],
"trajectory": "trajectories/26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"related_apps": [
],
"evaluator": {
"postconfig": [],
"func": "",
"result": {
"id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"snapshot": "multiapps",
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed (if the dir does not exist, create it).",
"source": "https://www.speedtest.net/",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
"expected": {
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
"options": {
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.speedtest.net/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; time.sleep(0.5);"
]
}
}
],
"trajectory": "trajectories/",
"related_apps":[
"os",
"browser"
],
"evaluator":{
"func": "compare_time_in_speedtest_results",
"result":{
"type": "vm_file",
"path": "/home/user/Test/Speed/Speedtest Results Export-.csv",
"dest": "Speedtest Results Export-.csv",
"time_suffix": true
},
"expected":{
"type": "time_diff_range",
"diff_range_in_minutes": "60"
}
}
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
"snapshot": "libreoffice_calc",
"instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ",
"instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. Making the necessary adjustments if it turns out that the current formatting does not align with APA 7 standards or exists some errors.",
"source": "authors",
"config": [
{
@@ -90,13 +90,45 @@
"related_apps": [
],
"evaluator": {
"postconfig": [],
"func": "",
"result": {
},
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "case study.docx - LibreOffice Writer",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
]
}
}
],
"func": "compare_references",
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1325Qfch0JaJ_wJ20ICxMoHeW8KLpK8v0&export=download",
"dest": "case study gold.docx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/students work/case study.docx",
"dest": "case study.docx"
},
"options": {
"content_only": true,
"reference_base_result": 0.92
}
}
}

View File

@@ -1,26 +1,169 @@
{
"id": "3a93cae4-ad3e-403e-8c12-65303b271818",
"snapshot": "libreoffice_calc",
"instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!",
"source": "authors",
"config": [
],
"trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818",
"related_apps": [
],
"evaluator": {
"postconfig": [],
"func": "",
"result": {
},
"expected": {
},
"options": {
"id": "3a93cae4-ad3e-403e-8c12-65303b271818",
"snapshot": "libreoffice_calc",
"instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!",
"source": "authors",
"config": [
{
"type": "command",
"parameters": {
"command": [
"mkdir",
"-p",
"/home/user/Desktop/students work/",
"/home/user/Desktop/Lec powerpoint/",
"/home/user/Desktop/Grammar test/",
"/home/user/Desktop/Grammar rules PDF/",
"/home/user/Desktop/FDI/"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"path": "/home/user/Desktop/students work/Zheng He .docx",
"url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download"
},
{
"path": "/home/user/Desktop/students work/cassie.docx",
"url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download"
},
{
"path": "/home/user/Desktop/students work/case study.docx",
"url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf",
"url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf",
"url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf",
"url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
},
{
"path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf",
"url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
},
{
"path": "/home/user/Desktop/Public Lecture Teaching Plan.docx",
"url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
},
{
"path": "/home/user/Desktop/Course Timetable.xlsx",
"url": "https://drive.google.com/uc?id=1DSjRYgofPK2jldKwIsAygz2x8XWlXCK6&export=download"
}
]
}
}
],
"trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818",
"related_apps": [
"os",
"libreoffice_calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Course Timetable.xlsx - LibreOffice Calc",
"strict": true
}
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
]
}
}
],
"func": [
"compare_table",
"compare_table",
"compare_table"
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/Course Timetable.xlsx",
"dest": "Course Timetable.xlsx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/Course Timetable.xlsx",
"dest": "Course Timetable.xlsx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/Course Timetable.xlsx",
"dest": "Course Timetable.xlsx"
}
],
"expected": [
{
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1VMOon8byWuoCW2Uk5etGMJLMzAfwFVyB&export=download",
"dest": "Course Timetable gold.xlsx"
},
{
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1jAThiIqILZ5t-RFPHVniSvAL8ZJO1H3P&export=download",
"dest": "Course Timetable gold 2.xlsx"
},
{
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1U0THDtPCgsw-Rb0N9fjF8DeOepPeUajP&export=download",
"dest": "Course Timetable gold 3.xlsx"
}
],
"options": [
{
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1",
"ignore_case": true
}
]
},
{
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1",
"ignore_case": true
}
]
},
{
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1",
"ignore_case": true
}
]
}
],
"conj": "or"
}
}

View File

@@ -0,0 +1,37 @@
{
"id": "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
"snapshot": "gimp",
"instruction": "Download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB. Resize if needed.",
"source": "",
"config": [
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5);"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp",
"os"
],
"evaluator": {
"func": "check_image_file_size",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/compressed.jpeg",
"dest": "compressed.jpeg"
},
"expected": {
"type": "rule",
"rules": {
"max_size": 600000
}
}
}
}

View File

@@ -0,0 +1,83 @@
{
"id": "3e3fc409-bff3-4905-bf16-c968eee3f807",
"snapshot": "chrome",
"instruction": "I'm a huge movie fan and have kept a record of all the movies I've watched. I'm curious to find out if there are any films released before 2024 from the IMDB Top 30 list that I haven't seen yet. Help me create another sheet 'unseen_movies' in the opened Excel. This sheet should share the same headers and sort the results according to IMDB rankings from high to low.",
"source": "authors",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.imdb.com"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1KVNVf5qZhprV_7rgEl33Qrkagv603reM&export=download&authuser=0&confirm=t",
"path": "/home/user/Desktop/movies.xlsx"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"libreoffice",
"--calc",
"/home/user/Desktop/movies.xlsx"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"libreoffice_calc",
"chrome"
],
"evaluator": {
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/movies.xlsx",
"dest": "movies.xlsx"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=149QKswQ8AIYk21Aaatic6QSCcBU40uyd&export=download&authuser=0&confirm=t",
"dest": "gold_movies.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RNunseen_movies",
"sheet_idx1": "ENunseen_movies"
}
]
}
}
}

View File

@@ -0,0 +1,52 @@
{
"id": "3eb2a122-a5e3-4f89-9820-f7fa1a582969",
"snapshot": "multiapps",
"instruction": "Please search online for the submission deadline and venue of the ICLR main conference in 2035, and copy it to my clipboard. If not yet publicized, copy None.",
"source": "author",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "command",
"parameters":{
"command": "echo password | sudo -S apt install xsel && xsel -bc",
"shell": "true"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"os", "chrome"
],
"evaluator": {
"func": "is_in_vm_clickboard",
"expected": {
"type": "vm_command_line",
"command": "xsel --clipboard --output",
"shell": "true"
},
"result": {
"type": "rule",
"rules": {
"expected": ["None"]
}
}
}
}

View File

@@ -0,0 +1,71 @@
{
"id": "42f4d1c7-4521-4161-b646-0a8934e36081",
"snapshot": "gimp",
"instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resize the image \"character.png\" to 128 * 128 as \"resized.png\".",
"source": "",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.google.com/uc?export=download&id=1yrWU5HimYPNUjdtvw1a218kh50fPVtZ3",
"path": "/home/user/Desktop/character.png"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp",
"vs_code"
],
"evaluator": {
"func": [
"is_extension_installed",
"check_image_size"
],
"result": [
{
"type": "vm_command_line",
"command": [
"code",
"--list-extensions",
"|",
"grep",
"mattn.lisp"
]
},
{
"type": "vm_file",
"path": "/home/user/Desktop/resized.png",
"dest": "resized.png"
}
],
"expected": [
{
"type": "rule",
"rules": {
"type": "contain",
"expected": "mattn.lisp"
}
},
{
"type": "rule",
"rules": {
"height": 128,
"width": 128
}
}
]
}
}

Some files were not shown because too many files have changed in this diff Show More