diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 7a96288..b475008 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -450,6 +450,8 @@ class SetupController: query(str): query pattern string to search files or folder in google drive to delete, please refer to https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. trash(bool): whether to delete files permanently or move to trash. By default, trash=false, completely delete it. + for mkdirs: + path(List[str]): the path in the google drive to create folder for upload: path(str): remote url to download file dest(List[str]): the path in the google drive to store the downloaded file diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 43f4c1b..958d98d 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -23,9 +23,10 @@ from .chrome import ( get_active_tab_url_parse, get_gotoRecreationPage_and_get_html_content, get_url_dashPart, - get_active_url_from_accessTree + get_active_url_from_accessTree, + get_info_from_website ) -from .file import get_cloud_file, get_vm_file, get_cache_file +from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file from .general import get_vm_command_line, get_vm_terminal_output from .gimp import get_gimp_config_file from .impress import get_audio_in_slide diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 86aefc4..8bd5842 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -11,7 +11,7 @@ import lxml.etree import requests from lxml.cssselect import CSSSelector from lxml.etree import _Element -from playwright.sync_api import sync_playwright +from playwright.sync_api import sync_playwright, expect from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile @@ -35,6 +35,89 @@ WARNING: """ +def get_info_from_website(env, config: Dict[Any, Any]) -> Any: + """ Get information from a website. Especially useful when the information may be updated through time. + Args: + env (Any): The environment object. + config (Dict[Any, Any]): The configuration dictionary. + - url (str): The URL of the website to visit + - infos (List[Dict[str, str]]): The list of information to be extracted from the website. Each dictionary contains: + - action (str): chosen from 'inner_text', 'attribute', 'click_and_inner_text', 'click_and_attribute', etc., concretely, + - inner_text: extract the inner text of the element specified by the selector + - attribute: extract the attribute of the element specified by the selector + - click_and_inner_text: click elements following the selector and then extract the inner text of the last element + - click_and_attribute: click elements following the selector and then extract the attribute of the last element + - selector (Union[str, List[str]]): The CSS selector(s) of the element(s) to be extracted. + - attribute (str): optional for 'attribute' and 'click_and_attribute', the attribute to be extracted. + - backups (Any): The backup information to be returned if the extraction fails. + """ + try: + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + try: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + except Exception as e: + # If the connection fails (e.g., the agent close the browser instance), start a new browser instance + app = 'chromium' if 'arm' in platform.machine() else 'google-chrome' + payload = json.dumps({"command": [ + app, + "--remote-debugging-port=1337" + ], "shell": False}) + headers = {"Content-Type": "application/json"} + requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + time.sleep(5) + browser = p.chromium.connect_over_cdp(remote_debugging_url) + + page = browser.contexts[0].new_page() + page.goto(config["url"]) + page.wait_for_load_state('load') + infos = [] + for info_dict in config.get('infos', []): + if page.url != config["url"]: + page.goto(config["url"]) + page.wait_for_load_state('load') + action = info_dict.get('action', 'inner_text') + if action == "inner_text": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == "attribute": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + elif action == 'click_and_inner_text': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == 'click_and_attribute': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + else: + raise NotImplementedError(f'The action {action} is not supported yet.') + return infos + except Exception as e: + logger.error(f'[ERROR]: failed to obtain information from the website: {config["url"]}. Use backup results instead.') + return config.get('backups', None) + + # The following ones just need to load info from the files of software, no need to connect to the software def get_default_search_engine(env, config: Dict[str, str]): os_type = env.vm_platform diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 95c0a18..98f6e00 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,8 +1,27 @@ import os from typing import Dict, List, Set from typing import Optional, Any, Union - import requests +import pandas as pd + + +def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any: + """ + Config: + path (str): absolute path on the VM to fetch + """ + + path = config["path"] + file_path = get_vm_file(env, {"path": path, "dest": os.path.basename(path)}) + file_type, file_content = config['file_type'], config['file_content'] + if file_type == 'xlsx': + if file_content == 'last_row': + df = pd.read_excel(file_path) + last_row = df.iloc[-1] + last_row_as_list = last_row.astype(str).tolist() + return last_row_as_list + else: + raise NotImplementedError(f"File type {file_type} not supported") def get_cloud_file(env, config: Dict[str, Any]) -> Union[str, List[str]]: diff --git a/desktop_env/evaluators/getters/impress.py b/desktop_env/evaluators/getters/impress.py index ec4a154..d0673dd 100644 --- a/desktop_env/evaluators/getters/impress.py +++ b/desktop_env/evaluators/getters/impress.py @@ -40,20 +40,23 @@ def get_audio_in_slide(env, config: Dict[str, str]): audio_file_path = audio_file_path.replace('\\', '/') # Create a temporary directory to extract the audio file - with tempfile.TemporaryDirectory() as tmpdirname: - # Extract the audio file - myzip.extract(audio_file_path, tmpdirname) - # Get the full path of the extracted audio file - extracted_audio_path = os.path.join(tmpdirname, audio_file_path) - # Return the extracted audio file path - audio_file_path = extracted_audio_path - + tmpdirname = os.path.dirname(ppt_file_localhost_path) + myzip.extract(audio_file_path, tmpdirname) + audio_file_path = os.path.join(tmpdirname, audio_file_path) + return audio_file_path + # with tempfile.TemporaryDirectory() as tmpdirname: + # # Extract the audio file + # myzip.extract(audio_file_path, tmpdirname) + # # Get the full path of the extracted audio file + # extracted_audio_path = os.path.join(tmpdirname, audio_file_path) + # # Return the extracted audio file path + # audio_file_path = extracted_audio_path else: # the audio file is external to the .pptx file # Return the audio file path assert target.startswith("file://"), target audio_file_path = target[7:] - + break if audio_file_path is None: return None diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index db04aea..976db19 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,5 +1,5 @@ import logging -from typing import TypeVar +from typing import TypeVar, Dict from datetime import datetime, timedelta logger = logging.getLogger("desktopenv.getters.misc") @@ -74,13 +74,13 @@ relativeTime_to_IntDay = { "first monday four months later": "special" } -def get_rule(env, config: R) -> R: +def get_rule(env, config: Dict[str, R]) -> R: """ Returns the rule as-is. """ return config["rules"] -def get_rule_relativeTime(env, config: R) -> R: +def get_rule_relativeTime(env, config: Dict[str, R]) -> R: """ According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time. config: diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 6d49742..555cff2 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -18,7 +18,8 @@ from .chrome import ( is_expected_search_query, is_expected_active_tab, is_expected_url_pattern_match, - is_added_to_steam_cart + is_added_to_steam_cart, + compare_pdf_images ) from .docs import ( compare_font_names, @@ -45,7 +46,8 @@ from .docs import ( is_first_line_centered, check_file_exists, check_tabstops, - compare_contains_image + compare_contains_image, + compare_docx_images ) from .general import ( check_csv, @@ -58,7 +60,8 @@ from .general import ( fuzzy_match, check_include_exclude, check_direct_json_object, - diff_text_file + diff_text_file, + literal_match ) from .gimp import ( check_brightness_decrease_and_structure_sim, @@ -129,7 +132,8 @@ from .vscode import ( check_json_keybindings, check_python_file_by_test_suite, check_python_file_by_gold_file, - check_html_background_image + check_html_background_image, + compare_zip_files ) diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 0a76bf0..3c367b3 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") return score / len(pdf2_path) +import fitz +from PIL import Image +from io import BytesIO + +def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: + def extract_images_from_pdf(pdf_path): + pdf_document = fitz.open(pdf_path) + images = [] + + for page_number in range(pdf_document.page_count): + page = pdf_document[page_number] + image_list = page.get_images(full=True) + + for img_index, img_info in enumerate(image_list): + base_image = pdf_document.extract_image(img_index) + image_bytes = base_image["image"] + + images.append(BytesIO(image_bytes)) + + return images + + images1 = extract_images_from_pdf(pdf1_path) + images2 = extract_images_from_pdf(pdf2_path) + + if len(images1) != len(images2): + return 0. + + for i, (img1, img2) in enumerate(zip(images1, images2), 1): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0. + + return 1. + def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: """ diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a17da39..4fa1a03 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -58,6 +58,8 @@ def contains_page_break(docx_file): def compare_docx_files(file1, file2, **options): ignore_blanks = options.get('ignore_blanks', True) + ignore_case = options.get('ignore_case', False) + ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) def get_paragraph_texts_odt(document): @@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options): doc2 = Document(file2) doc1_paragraphs = [p.text for p in doc1.paragraphs] doc2_paragraphs = [p.text for p in doc2.paragraphs] + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) elif file1.endswith('.odt') and file2.endswith('.odt'): doc1 = load(file1) doc2 = load(file2) doc1_paragraphs = get_paragraph_texts_odt(doc1) doc2_paragraphs = get_paragraph_texts_odt(doc2) + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) else: # Unsupported file types or mismatch print("Unsupported file types or mismatch between file types.") @@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options): # Compare the content of the documents text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() similarity = fuzz.ratio(text1, text2) / 100.0 return similarity @@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options): if ignore_blanks: text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() if text1 != text2: return 0 else: @@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options): # Compare each paragraph for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if ignore_case: + p1, p2 = p1.lower(), p2.lower() if p1 != p2: return 0 @@ -159,6 +173,29 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 +from io import BytesIO +from PIL import Image + +def compare_docx_images(docx_file1, docx_file2): + doc1 = Document(docx_file1) + doc2 = Document(docx_file2) + + def extract_images(doc): + images = [] + for rel in doc.part.rels.values(): + if "image" in rel.reltype: + img_data = rel.target_part.blob + images.append(BytesIO(img_data)) + return images + + images1 = extract_images(doc1) + images2 = extract_images(doc2) + if len(images1) != len(images2): + return 0 + for img1, img2 in zip(images1, images2): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0 + return 1 def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 4458a69..03f6b13 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,6 +1,7 @@ import csv import functools import json +import yaml import operator import re import sqlite3 @@ -39,6 +40,24 @@ def exact_match(result, rules) -> float: else: return 0. + +def literal_match(result: Any, expected: Any, **options) -> float: + literal_type = options.get('type', 'str') + if literal_type == 'str': + ignore_case = options.get('ignore_case', False) + score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower() + return float(score) + elif literal_type == 'list': + if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected): + return .0 + ignore_case = options.get('ignore_case', False) + result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result] + expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected] + return float(result == expected) + else: + raise NotImplementedError(f"Type {type} not supported") + + def is_in_list(result, rules) -> float: expect = rules["expected"] if expect in result: @@ -132,11 +151,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" } -def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: +def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: """ Args: result (str): XML of GNOME Accessibility Tree - rules (Dict[str, Any]): dict like + rules (List[Dict[str, Any]]): list of dict like { "selectors": list of str as CSS selectors, will be connected by ", " to form a composite selector. Only one from `selectors` and @@ -154,30 +173,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: """ at: _Element = lxml.etree.fromstring(result) - if "xpath" in rules: - elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map) - elif "selectors" in rules: - selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map) - elements: List[_Element] = selector(at) - else: - raise ValueError("At least one of xpath and selectors is required") + total_match_score = 1. + for r in rules: + if "xpath" in r: + elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map) + elif "selectors" in r: + selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map) + elements: List[_Element] = selector(at) + else: + raise ValueError("At least one of xpath and selectors is required") - if len(elements) == 0: - print("no elements") - return 0. + if len(elements) == 0: + print("no elements") + return 0. - if "text" in rules: - match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , rules["text"] - ) - match_score: Number = 0 - for elm in elements: - match_score = max(match_score, match_func(elm.text or None)) - else: - match_score = 1. + if "text" in r: + match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) + match_score: Number = 0 + for elm in elements: + match_score = max(match_score, match_func(elm.text or None)) + else: + match_score = 1. + total_match_score *= match_score - return float(match_score) + return float(total_match_score) # def check_existence(result: str, *args) -> float: @@ -189,7 +211,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: return float(cursor.fetchone()[0] or 0) -def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: +def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float: """ Args: result (str): path to json file @@ -204,6 +226,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str ], "unexpect": float: for rl in r["rules"]: for rng in MultiCellRange(rl["range"]): for cdn in rng.cells: - value1: str = str(read_cell_value(*sheet1, cdn)) - value2: str = str(read_cell_value(*sheet2, cdn)) + coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0]) + value1: str = str(read_cell_value(*sheet1, coordinate)) + value2: str = str(read_cell_value(*sheet2, coordinate)) + logger.debug("%s: %s vs %s", cdn, value1, value2) for rplc in rl.get("normalization", []): value1 = value1.replace(rplc[0], rplc[1]) @@ -230,11 +233,11 @@ def compare_table(result: str, expected: str = None, **options) -> float: if rl["type"]=="includes": metric: bool = value1 in value2 - if rl["type"]=="includes_by": + elif rl["type"]=="includes_by": metric: bool = value2 in value1 - if rl["type"]=="fuzzy_match": + elif rl["type"]=="fuzzy_match": metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) - if rl["type"]=="exact_match": + elif rl["type"]=="exact_match": metric: bool = value1==value2 total_metric = total_metric and metric diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 4515cd5..b57de00 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -311,14 +311,15 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: , namespaces=_xlsx_ns_imapping ) logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell)) - if "@t" not in cell["c"]: + try: + if "@t" not in cell["c"] or cell["c"]["@t"] == "n": + return float(cell["c"]["v"]) + if cell["c"]["@t"] == "s": + return shared_strs[int(cell["c"]["v"])] + if cell["c"]["@t"] == "str": + return cell["c"]["v"] + except (KeyError, ValueError): return None - if cell["c"]["@t"] == "s": - return shared_strs[int(cell["c"]["v"])] - if cell["c"]["@t"] == "n": - return float(cell["c"]["v"]) - if cell["c"]["@t"] == "str": - return cell["c"]["v"] # }}} read_cell_value # diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index 2331a75..cc38697 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float: return 1.0 return 0.0 +import zipfile + +def compare_zip_files(actual: str, expected: str, **options) -> float: + """ + Args: + actual (str): path to result zip file + expected (str): path to gold zip file + + Return: + float: the score + """ + if not actual: + return 0. + + with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2: + file_list1 = set(zip_file1.namelist()) + file_list2 = set(zip_file2.namelist()) + + if file_list1 != file_list2: + return 0.0 + + for file_name in file_list1: + content1 = zip_file1.read(file_name) + content2 = zip_file2.read(file_name) + + if content1 != content2: + return 0.0 + return 1.0 + def compare_config(actual: str, rules: Dict, **options) -> float: if not actual: diff --git a/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json new file mode 100644 index 0000000..30d5c38 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json @@ -0,0 +1,96 @@ +{ + "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "snapshot": "libreoffice_calc", + "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx", + "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download" + }, + { + "path": "/home/user/Desktop/review_template.pdf", + "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/review_template.pdf" + } + } + ], + "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "related_apps": [ + "libreoffice_calc", + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download", + "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download", + "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download", + "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download", + "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download", + "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download", + "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download" + ], + "dest": [ + "Alex Lee_Gold.pdf", + "David Wilson_Gold.pdf", + "Emily Johnson_Gold.pdf", + "John Doe_Gold.pdf", + "Linda Green_Gold.pdf", + "Michael Brown_Gold.pdf", + "Sophia Carter_Gold.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + }, + "expected": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/Alex Lee.pdf", + "/home/user/Desktop/David Wilson.pdf", + "/home/user/Desktop/Emily Johnson.pdf", + "/home/user/Desktop/John Doe.pdf", + "/home/user/Desktop/Linda Green.pdf", + "/home/user/Desktop/Michael Brown.pdf", + "/home/user/Desktop/Sophia Carter.pdf" + ], + "dest": [ + "Alex Lee.pdf", + "David Wilson.pdf", + "Emily Johnson.pdf", + "John Doe.pdf", + "Linda Green.pdf", + "Michael Brown.pdf", + "Sophia Carter.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json new file mode 100644 index 0000000..550755b --- /dev/null +++ b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json @@ -0,0 +1,58 @@ +{ + "id": "20236825-b5df-46e7-89bf-62e1d640a897", + "snapshot": "vscode", + "instruction": "I am coding on my algorithm practice. The doc \"bubble_Sort_tutorial.docx\" is the document for it. Help me finish the function 'bubbleSort' in 'bubbleSort.py' on the Desktop save the output in 'res.txt' on Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1AQtZ8Hrf7WtyUtaHMtDN-UErKRXnW64d&export=download&authuser=0&confirm=t&uuid=bf7f2b4b-ecf9-4260-b74e-db0cd40b58ca&at=APZUnTVimJzbmwJ8-4E1lq9ipJf_:1709624149129", + "path": "/home/user/Desktop/bubbleSort.zip" + }, + { + "url": "https://drive.usercontent.google.com/download?id=168ZHCnK6v5PEZ8G5M25ZUW9fICk4OlfE&export=download&authuser=0&confirm=t&uuid=3642df08-dc40-4d37-93a0-8532e3012fb0&at=APZUnTUP1OTlq0kIgqcj7YSWw6MB:1709622592489", + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "unzip /home/user/Desktop/bubbleSort.zip -d /home/user/Desktop/ && rm -rf /home/user/Desktop/bubbleSort.zip" + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + } + ], + "trajectory": "trajectories/20236825-b5df-46e7-89bf-62e1d640a897", + "related_apps": [ + "vscode", + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_text_file", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315", + "dest": "res.txt" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/res.txt", + "dest": "res.txt" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json new file mode 100644 index 0000000..f186383 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json @@ -0,0 +1,69 @@ +{ + "id": "227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "snapshot": "gimp", + "instruction": "I have my .xcf file saved on Desktop. Could you help me copy the image and paste it into a Libreoffice Writer file? Save it as 'image.docx' on the Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/QTdHniCqfJbBLJe3L3nijU-1200-80.xcf", + "url": "https://drive.usercontent.google.com/download?id=1BGoDOu9bYIG7Twj5dVTxRIWDUgWzzDtP&export=download&authuser=0&confirm=t&uuid=235d1bb2-37a0-4d96-82bf-a87f31d03cb4&at=APZUnTX53EdR1stASFS3OH5luAtB:1709617456061" + } + ] + } + } + ], + "trajectory": "trajectories/227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "related_apps": [ + "libreoffice_writer", + "gimp", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "image.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_docx_images", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/image.docx", + "dest": "image.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292", + "dest": "image.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json new file mode 100644 index 0000000..d22ddaf --- /dev/null +++ b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json @@ -0,0 +1,108 @@ +{ + "id": "22a4636f-8179-4357-8e87-d1743ece1f81", + "snapshot": "chrome", + "instruction": "Please help me convert the file \"Meeting-Agenda.docx\" to a pdf file and upload to the folder 'meetings' in my google drive.", + "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": [ + "delete", + "mkdirs" + ], + "args": [ + { + "query": "title = 'Meeting-Agenda.docx' or title = 'Meeting-Agenda.pdf'", + "trash": false + }, + { + "path": [ + "meetings" + ] + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://news.google.com", + "https://x.com", + "https://www.deepl.com" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=17fkMG4guromDzAHNCzzJieZHa2nJDBpc&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Meeting-Agenda.docx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/Desktop/Meeting-Agenda.docx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_writer", + "chrome" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "path": [ + "meetings", + "Meeting-Agenda.pdf" + ], + "dest": "Meeting-Agenda.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1wHWQ6mTJcNLtrL83GrUPzxx2HeOC4L8T&export=download&authuser=0&confirm=t", + "dest": "gold-Meeting-Agenda.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json new file mode 100644 index 0000000..3c5f349 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json @@ -0,0 +1,67 @@ +{ + "id": "236833a3-5704-47fc-888c-4f298f09f799", + "snapshot": "chrome", + "instruction": "Find daily papers on Huggingface and take down all the titles, authors and the abstracts of papers on 1st March, 2024 in the doc file 'paper_reading_2024_03_01.docx' on desktop. Each paragraph (split by empty lines) conforms to the following format:\nTitle: xxx\nAuthors: xxx, xxx, xxx\nAbstract: xxxxxxxx.\nArxiv PDF: https://xxxx.pdf", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://huggingface.co/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_docx_files", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/paper_reading_2024_03_01.docx", + "dest": "paper_reading_2024_03_01.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1TUTihXD93bIlekuYy_44fmXAhI1KVol4&export=download&authuser=0&confirm=t", + "dest": "gold_paper_reading_2024_03_01.docx" + }, + "options": { + "ignore_blanks": true, + "ignore_case": true, + "ignore_order": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json new file mode 100644 index 0000000..239d695 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -0,0 +1,102 @@ +{ + "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "snapshot": "libreoffice_calc", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/The literature reviews of weekly readings.docx", + "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download" + }, + { + "path": "/home/user/Desktop/students work/The British Justice System.docx", + "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz2.docx", + "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz.docx", + "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download" + }, + { + "path": "/home/user/Desktop/students work/Q1&2&3.docx", + "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download" + }, + { + "path": "/home/user/Desktop/students work/Photo Ethics in Journalism.docx", + "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json new file mode 100644 index 0000000..d2d4193 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json @@ -0,0 +1,49 @@ +{ + "id": "337d318b-aa07-4f4f-b763-89d9a2dd013f", + "snapshot": "libreoffice_calc", + "instruction": "Cross-check the invoices with the bank statements and identify any discrepancies. Then pull out the invoices that don't match the statements and put them in the \"problematic\" folder.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/invoice TII-20220301-90.pdf", + "url": "https://drive.google.com/uc?id=13y1Dkh9dJUaWeMSk6pg_UY-R7K4bhAJM&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # GES-20220215-82.pdf", + "url": "https://drive.google.com/uc?id=1zZYJQVpbGmqT_eH0x9Z5O7WoIFrQo3sN&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # 243729.pdf", + "url": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download" + }, + { + "path": "/home/user/Desktop/Bank-Statement.pdf", + "url": "https://drive.google.com/uc?id=1-KS6p0aip56iPmH4okhXZhLgqVwrcjfw&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/337d318b-aa07-4f4f-b763-89d9a2dd013f", + "related_apps": [ + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/problematic/Invoice # 243729.pdf", + "dest": "Invoice # 243729.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download", + "dest": "Invoice # 243729 Gold.pdf" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json new file mode 100644 index 0000000..e1f0544 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -0,0 +1,26 @@ +{ + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json new file mode 100644 index 0000000..f3365fb --- /dev/null +++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json @@ -0,0 +1,147 @@ +{ + "id": "415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "snapshot": "thunderbird", + "instruction": "Save the AWS invoice of December from the email. I have moved that email to local \"Bills\" folder. Save it to the my receipts folder. Keep the file name pattern and update a record to my tally book.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects"] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": ["thunderbird"] + } + } + ], + "trajectory": "trajectories/415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "related_apps": ["thunderbird", "libreoffice_calc", "os"], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "tally_book.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/.aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["diff", ".aws-invoice-2312.pdf", "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf"], + "stdout": "diff.out" + } + } + ], + "func": ["compare_table", "check_list"], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Documents/Finance/tally_book.xlsx", + "dest": "tally_book.xlsx" + }, + { + "type": "cache_file", + "path": "diff.out" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1x8m-korGI1PhJm8PAQVTlWYKneK4WKvn&export=download", + "dest": "tally_book_gt.xlsx" + }, + { + "type": "rule", + "rules": { + "unexpect": [ + ".+" + ] + } + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + }, + {} + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json new file mode 100644 index 0000000..9d754dd --- /dev/null +++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json @@ -0,0 +1,116 @@ +{ + "id": "5990457f-2adb-467b-a4af-5c857c92d762", + "snapshot": "chrome", + "instruction": "Append one entry of AI researcher Yann LeCun from Google Scholar into an existing table researchers.xlsx.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://arxiv.org/abs/2005.14165", + "https://wallhaven.cc/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EbQ75SHLthiZCBqCJtO0fLXJZcKrNler&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/researchers.xlsx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/Desktop" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_calc" + ], + "evaluator": { + "func": "literal_match", + "result": { + "type": "content_from_vm_file", + "path": "/home/user/Desktop/researchers.xlsx", + "file_type": "xlsx", + "file_content": "last_row" + }, + "expected": { + "type": "info_from_website", + "url": "https://scholar.google.com/citations?user=WLN3QrAAAAAJ&hl=en", + "infos": [ + { + "action": "inner_text", + "selector": "#gsc_prf_in" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(1) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(2) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(3) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a" + }, + { + "action": "click_and_attribute", + "selector": [ + "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a", + "#gsc_oci_title_gg > div:nth-child(1) > a" + ], + "attribute": "href" + } + ], + "backups": [ + "Yann LeCun", + "345074", + "147", + "372", + "Deep learning", + "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf" + ] + }, + "options": { + "type": "list", + "ignore_case": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json new file mode 100644 index 0000000..f414cc4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json @@ -0,0 +1,110 @@ +{ + "id": "67890eb6-6ce5-4c00-9e3d-fb4972699b06", + "snapshot": "libreoffice_calc", + "instruction": "I am an NLP researcher. Check out the best long paper awards of ACL from 2019 to 2022 and record the 'title', 'year', 'author list' and 'PDF link' into table best_awards_acl.xslx on the desktop. Separate authors only by commas, use offical aclanthology urls not arxiv, and sort rows by years in descending orders.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1v5DgViUgAL771kBWy-qBddVGyjGmgFhK", + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://aclanthology.org/" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "best_awards_acl.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/best_awards_acl.xlsx", + "dest": "best_awards_acl.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=17ORdOPl3sZGk3s4Wm0vESgImKZjdZBqX&export=download&authuser=0&confirm=t", + "dest": "gold_best_awards_acl.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json index 9c7a2b7..28542f8 100644 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json @@ -1,27 +1,25 @@ { - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", + "source": "authors", + "config": [ + { + } + ], + "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } diff --git a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json new file mode 100644 index 0000000..a18657a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json @@ -0,0 +1,89 @@ +{ + "id": "778efd0a-153f-4842-9214-f05fc176b877", + "snapshot": "vlc", + "instruction": "I'm using libreoffice impress to write slideshows. I found that the video being played by VLC media player had a good soundtrack. Please extract the audio to planet.wav and use it as background music for this slideshow.", + "source": "https://researchguides.case.edu/c.php?g=1286426", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1I-ArULOnZzlGkS9UyIuE8Dyuyus27iZt&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1HiW-PokIfWRyRvLwlkiVKKNCB2h2bcx7&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/planet.mp4" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "vlc", + "--repeat", + "/home/user/Desktop/planet.mp4" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_impress", + "vlc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Minimalist_Business_Slides.pptx - LibreOffice Impress", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_audios", + "result": { + "type": "audio_in_slide", + "ppt_file_path": "/home/user/Desktop/Minimalist_Business_Slides.pptx", + "slide_index": 0, + "dest": "planet.wav" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1F_iBPLgVg-2g1LQ5rxKyCrFa9UitZ9yn&export=download&authuser=0&confirm=t", + "dest": "gold_planet.wav" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json new file mode 100644 index 0000000..deda04e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -0,0 +1,89 @@ +{ + "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "snapshot": "libreoffice_calc", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file so I can easily share it with others later.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/IDS LLM seminar/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00659.jpg", + "url": "https://drive.google.com/uc?id=1NjnSab2aEtJytYajM9FqeXsLm4ItxTsJ&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00657.jpg", + "url": "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00574.jpg", + "url": "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00554.jpg", + "url": "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00495.jpg", + "url": "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00454.jpg", + "url": "https://drive.google.com/uc?id=1t9L7fVQVxjovTQufetlogulIctn7DF_L&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "related_apps": [ + "os", + "image" + ], + "evaluator": { + "func": "compare_image_list", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/presenter/DSC00657.jpg", + "/home/user/Desktop/presenter/DSC00574.jpg", + "/home/user/Desktop/presenter/DSC00554.jpg", + "/home/user/Desktop/presenter/DSC00495.jpg" + ], + "dest": [ + "DSC00657.jpg", + "DSC00574.jpg", + "DSC00554.jpg", + "DSC00495.jpg" + ], + "multi": "true" + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download", + "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download", + "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download", + "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + ], + "dest": [ + "DSC00657_gold.jpg", + "DSC00574_gold.jpg", + "DSC00554_gold.jpg", + "DSC00495_gold.jpg" + ], + "multi": "true" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json new file mode 100644 index 0000000..12263d2 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json @@ -0,0 +1,47 @@ +{ + "id": "8df7e444-8e06-4f93-8a1a-c5c974269d82", + "snapshot": "libreoffice_writer", + "instruction": "In the \"reminder.docx\" on Desktop is the submission instruction of our essay work. My essay is saved as docx file in /home/user. Please help me prepare the files for submission as required. ", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1wrCJd2f0xYnrHcj6KDzCe96X9JsN3VI3&export=download&authuser=0&confirm=t&uuid=277cb94d-1981-4f4d-b1ba-bceac8146001&at=APZUnTWKU5DBnr_6-_ZlEdsvhpCz:1709633482673", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.docx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1vzawJthEhQHcK4cUF0W9QT4zaFywO6aH&export=download&authuser=0&confirm=t&uuid=0fbb6a50-f9c1-44d2-b430-3af738d5fadc&at=APZUnTWyfv-N5f-EjnF8ob-VfCsD:1709633450986", + "path": "/home/user/Desktop/reminder.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/reminder.docx" + } + } + ], + "trajectory": "trajectories/8df7e444-8e06-4f93-8a1a-c5c974269d82", + "related_apps": [ + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_zip_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.zip", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json new file mode 100644 index 0000000..ce00111 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -0,0 +1,241 @@ +{ + "id": "8e116af7-7db7-4e35-a68b-b0939c066c78", + "snapshot": "libreoffice_calc", + "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download" + }, + { + "path": "/home/user/Desktop/receipt_0.jpeg", + "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download" + }, + { + "path": "/home/user/Desktop/receipt_1.jpg", + "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download" + }, + { + "path": "/home/user/Desktop/receipt_2.jpg", + "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download" + }, + { + "path": "/home/user/Desktop/receipt_3.pdf", + "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download" + }, + { + "path": "/home/user/Desktop/receipt_4.jpg", + "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/my_bookkeeping.xlsx" + } + } + ], + "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", + "related_apps": [ + "libreoffice_calc", + "os", + "image", + "pdf" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "my_bookkeeping.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "dest": "my_bookkeeping.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download", + "dest": "my_bookkeeping_gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "A1:A8", + "B1:B8", + "C1:C8", + "D1:D8", + "E1:E8" + ], + "type": "exact_match" + } + ] + }, + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "C9:C13" + ], + "type": "exact_match", + "ignore_case": true + } + ] + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D9", + "props": { + "value": { + "method": "approx:0.1", + "ref": -186.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3670 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -5.7 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -154.06 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -8.1 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E9", + "props": { + "value": { + "method": "approx:0.1", + "ref": 603.07 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3066.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3072.63 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3226.69 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3234.79 + } + } + } + ] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json new file mode 100644 index 0000000..f6f506f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json @@ -0,0 +1,47 @@ +{ + "id": "a503b07f-9119-456b-b75d-f5146737d24f", + "snapshot": "os", + "instruction": "I am preparing for my paper reading report. Could you help me download this image at https://github.com/xlang-ai/OpenAgents/blob/main/pics/openagents_overview.png and convert it to PDF format. Save it as \"openagents_overview.pdf\" on desktop.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/a503b07f-9119-456b-b75d-f5146737d24f", + "related_apps": [ + "os", + "chrome", + "gimp", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1gD1odFNuLT6TP6rpAv_hot86pKcI5wY1&export=download&authuser=0&confirm=t&uuid=9d17c689-95d3-45e4-b093-0165de4045b4&at=APZUnTV9SssKQCoYyYeVsi8e9zcX:1709649491796", + "dest": "openagents_overview.pdf" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/openagents_overview.pdf", + "dest": "openagents_overview.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json new file mode 100644 index 0000000..9289558 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json @@ -0,0 +1,85 @@ +{ + "id": "b337d106-053f-4d37-8da0-7f9c4043a66b", + "snapshot": "os", + "instruction": "I've recently wanted to try using the Vim editor to edit code, but my Vim editor doesn't show relative line numbers by default. Please search the internet for a tutorial on how to display relative line numbers and make it the default setting for my local Vim.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + } + ], + "trajectory": "trajectories/b337d106-053f-4d37-8da0-7f9c4043a66b", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1CyhWjUS2oov4Fzc0VRwTh6LiS2Qu-T_8&export=download&authuser=0&confirm=t&uuid=9d0e2c62-895c-4bb3-a057-30cae60329ed&at=APZUnTVngSwARjYsWSmhSyHAqwID:1709647023362", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + } + ], + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "The File Has Set Number!" + ], + "exclude": [] + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json index 0b797a7..99c4da3 100644 --- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json +++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json @@ -1,7 +1,7 @@ { "id": "b5062e3e-641c-4e3a-907b-ac864d2e7652", "snapshot": "libreoffice_calc", - "instruction": "I've got a mass of different categories of paper in PDF under folder \"~/Documents/Papers\". Please help me to extract the name, mail, and affiliation of the first author of all the papers and note them in an Excel table. The corresponding headers should be added in the table. Simply sort the authors by their full names ascendingly. The summary file should be saved as \"~/authors.xlsx\".", + "instruction": "I've got a mass of papers in PDF under folder \"~/Documents/Papers\". Please help me to extract the name, mail, and affiliation of the first author of all the papers and note them in an Excel table. The corresponding headers should be added in the table. Simply sort the authors by their full names ascendingly. The summary file should be saved as \"~/authors.xlsx\".", "source": "authors", "config": [ { diff --git a/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json new file mode 100644 index 0000000..0fbac44 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json @@ -0,0 +1,26 @@ +{ + "id": "d28853f4-500a-4749-b9dc-79c3576e759b", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! So, I've got this bit of a situation and I'm hoping you can help me out. I've been working on gathering research for this big project at work, and I've ended up with a bunch of PDF files from various sources. Each PDF contains some really crucial information that I need, but here's the kicker - I need all of this info to be in one place where I can easily access and edit it, like in a Word document or a README file.\n\nThe PDFs are a mix of things - some are reports with statistics and findings, others are articles with key insights, and a few are just informational brochures with important dates and details. What I need is for someone to go through each PDF, extract all the relevant information, and then organize it into a single document. I'm talking about making sure that all the stats, insights, dates, and details from each PDF are neatly compiled, so I don't miss anything.\n\nAnd to make things a bit more complicated, some of these PDFs are scanned images of documents, so the text isn't directly selectable. I guess that means you might need to manually type out some parts or find a way to convert the images to text that can be copied.\n\nOnce everything's been compiled into this one document, could you save it as both a Word doc and a README file? I'd like to have it in these two formats so I can easily share it with my team and also have a version that's ready to be uploaded to our project's repository.\n\nOh, and could you make sure to organize the information by the source PDF? Like, maybe start each section with the title of the PDF or a brief description of its contents, followed by all the extracted info from that PDF. This way, it'll be easier for me and the team to trace back to the original sources if we need to.\n\nI know it's a lot to ask, but having all this information consolidated and organized is going to be a huge help for moving forward with the project. Thanks a bunch for helping me tackle this!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/d28853f4-500a-4749-b9dc-79c3576e759b", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json new file mode 100644 index 0000000..0037591 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json @@ -0,0 +1,101 @@ +{ + "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "snapshot": "libreoffice_calc", + "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/New Large Language Models.xlsx" + } + } + ], + "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "New Large Language Models.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "dest": "New Large Language Models.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download", + "dest": "New Large Language Models Gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "B2:B5", + "C2:C5" + ], + "type": "exact_match" + }, + { + "range": [ + "A2:A5" + ], + "type": "fuzzy_match", + "threshold": 90, + "ignore_case": true + } + ] + } + ] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json new file mode 100644 index 0000000..0d86766 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json @@ -0,0 +1,59 @@ +{ + "id": "e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "snapshot": "libreoffice_writer", + "instruction": "Install LanguageTool extension for my LibreOffice", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": ["libreoffice", "--writer"] + } + } + ], + "trajectory": "trajectories/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "related_apps": ["chrome", "libreoffice", "os"], + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["grep", "-nHr", "languagetool", "/home/user/.config/libreoffice/4/user/uno_packages/cache/uno_packages/"], + "stdout": "grep.out" + } + }, + { + "type": "command", + "parameters": { + "command": ["apt", "list", "--installed"], + "stdout": "apt.out" + } + } + ], + "func": ["check_list", "check_list"], + "result": [ + { + "type": "cache_file", + "path": "grep.out" + }, + { + "type": "cache_file", + "path": "apt.out" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": ["org\\.openoffice\\.languagetool\\.oxt"] + } + }, + { + "type": "rule", + "rules": { + "expect": ["openjdk-\\d+-(jre|jdk)"] + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json b/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json new file mode 100644 index 0000000..340f686 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json @@ -0,0 +1,86 @@ +{ + "id": "e2392362-125e-4f76-a2ee-524b183a3412", + "snapshot": "chrome", + "instruction": "I recently started using the famous personal academic homepage template from academicpages.github.io to build my own personal homepage, and I have cloned it to my local ~/Code/Website folder. According to an online tutorial, I can configure my name and contact information in the _config.yaml file. However, I am not familiar with the YAML file format. Please help me find the sections related to the name and contact information in this file and change them to “Test Account” and “Test@gmail.com”.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Code/Website"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/.tmp.tar.xz", + "url": "https://drive.google.com/uc?id=1ordb5kRSPDKgRi7nYQchn8hGt-INELML&export=download" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", ".tmp.tar.xz", "-C", "/home/user/Code/Website/"] + } + }, + { + "type": "launch", + "parameters": { + "command": ["google-chrome", "--remote-debugging-port=1337"] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": ["https://academicpages.github.io/"] + } + } + ], + "trajectory": "trajectories/e2392362-125e-4f76-a2ee-524b183a3412", + "related_apps": ["chrome", "os", "vscode"], + "evaluator": { + "func": "check_json", + "options": {"is_yaml": true}, + "expected": { + "type": "rule", + "rules": { + "expect": [ + { + "key": ["name"], + "method": "eq", + "ref": "Test Account" + }, + { + "key": ["author", "name"], + "method": "eq", + "ref": "Test Account" + }, + { + "key": ["author", "email"], + "method": "eq", + "ref": "Test@gmail.com" + } + ] + } + }, + "result": { + "type": "vm_file", + "path": "/home/user/Code/Website/academicpages.github.io/_config.yml", + "dest": "_config.yaml" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json new file mode 100644 index 0000000..131f65a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json @@ -0,0 +1,71 @@ +{ + "id": "f5c13cdd-205c-4719-a562-348ae5cd1d91", + "snapshot": "thunderbird", + "instruction": "Here is a table recording tuition payments in ~/Documents/Departments/finance. I have already composed an e-mail to remind people who haven't finished payment yet. Please help me to add their emails to the receiver field.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Departments/finance"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + }, + { + "url": "https://drive.google.com/uc?id=1k0_69RKAx18bEX00EJXtWmSwdfNVP9NA&export=download", + "path": "/home/user/.payment-reminder-mail-body.html" + }, + { + "url": "https://drive.google.com/uc?id=1nNc0NoOuP3Of0eGsKY-1kctg63vIjXl5&export=download", + "path": "/home/user/Documents/Departments/finance/tuition.xlsx" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": "/usr/bin/thunderbird -compose \"from='Anonym Tester ',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"", + "shell": true + } + } + ], + "trajectory": "trajectories/f5c13cdd-205c-4719-a562-348ae5cd1d91", + "related_apps": ["thunderbird", "os", "libreoffice_calc"], + "evaluator": { + "func": "check_accessibility_tree", + "result": { + "type": "accessibility_tree" + }, + "expected": { + "type": "rule", + "rules": [ + {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"]}, + {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"]}, + {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"]}, + {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"]} + ] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json new file mode 100644 index 0000000..0a56921 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json @@ -0,0 +1,31 @@ +{ + "id": "f8369178-fafe-40c2-adc4-b9b08a125456", + "snapshot": "chrome", + "instruction": "Help me to install Orchis theme from gnome-look.org and change to it for my GNOME desktop.", + "source": "https://itsfoss.com/install-switch-themes-gnome-shell", + "config": [], + "trajectory": "trajectories/f8369178-fafe-40c2-adc4-b9b08a125456", + "related_apps": ["chrome", "os"], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": ["gsettings", "get", "org.gnome.desktop.interface", "gtk-theme"], + "stdout": "gsettings.out" + } + } + ], + "func": "check_list", + "result": { + "type": "cache_file", + "path": "gsettings.out" + }, + "expected": { + "type": "rule", + "rules": { + "expect": ["Orchis"] + } + } + } +} diff --git a/evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json b/evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json index be50b20..678971d 100644 --- a/evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json +++ b/evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json @@ -48,11 +48,13 @@ }, "expected": { "type": "rule", - "rules": { - "selectors": [ - "application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]" - ] - } + "rules": [ + { + "selectors": [ + "application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]" + ] + } + ] }, "func": "check_accessibility_tree" } diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json deleted file mode 100644 index 049985c..0000000 --- a/evaluation_examples/settings/googledrive/credentials.json +++ /dev/null @@ -1 +0,0 @@ -{"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "token_expiry": "2024-03-02T13:59:28Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "expires_in": 3599, "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a6082f9..6571f11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ func-timeout beautifulsoup4 dashscope google-generativeai +PyYaml