diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 7a96288..b475008 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -450,6 +450,8 @@ class SetupController: query(str): query pattern string to search files or folder in google drive to delete, please refer to https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. trash(bool): whether to delete files permanently or move to trash. By default, trash=false, completely delete it. + for mkdirs: + path(List[str]): the path in the google drive to create folder for upload: path(str): remote url to download file dest(List[str]): the path in the google drive to store the downloaded file diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 43f4c1b..958d98d 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -23,9 +23,10 @@ from .chrome import ( get_active_tab_url_parse, get_gotoRecreationPage_and_get_html_content, get_url_dashPart, - get_active_url_from_accessTree + get_active_url_from_accessTree, + get_info_from_website ) -from .file import get_cloud_file, get_vm_file, get_cache_file +from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file from .general import get_vm_command_line, get_vm_terminal_output from .gimp import get_gimp_config_file from .impress import get_audio_in_slide diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 86aefc4..8bd5842 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -11,7 +11,7 @@ import lxml.etree import requests from lxml.cssselect import CSSSelector from lxml.etree import _Element -from playwright.sync_api import sync_playwright +from playwright.sync_api import sync_playwright, expect from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile @@ -35,6 +35,89 @@ WARNING: """ +def get_info_from_website(env, config: Dict[Any, Any]) -> Any: + """ Get information from a website. Especially useful when the information may be updated through time. + Args: + env (Any): The environment object. + config (Dict[Any, Any]): The configuration dictionary. + - url (str): The URL of the website to visit + - infos (List[Dict[str, str]]): The list of information to be extracted from the website. Each dictionary contains: + - action (str): chosen from 'inner_text', 'attribute', 'click_and_inner_text', 'click_and_attribute', etc., concretely, + - inner_text: extract the inner text of the element specified by the selector + - attribute: extract the attribute of the element specified by the selector + - click_and_inner_text: click elements following the selector and then extract the inner text of the last element + - click_and_attribute: click elements following the selector and then extract the attribute of the last element + - selector (Union[str, List[str]]): The CSS selector(s) of the element(s) to be extracted. + - attribute (str): optional for 'attribute' and 'click_and_attribute', the attribute to be extracted. + - backups (Any): The backup information to be returned if the extraction fails. + """ + try: + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + try: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + except Exception as e: + # If the connection fails (e.g., the agent close the browser instance), start a new browser instance + app = 'chromium' if 'arm' in platform.machine() else 'google-chrome' + payload = json.dumps({"command": [ + app, + "--remote-debugging-port=1337" + ], "shell": False}) + headers = {"Content-Type": "application/json"} + requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + time.sleep(5) + browser = p.chromium.connect_over_cdp(remote_debugging_url) + + page = browser.contexts[0].new_page() + page.goto(config["url"]) + page.wait_for_load_state('load') + infos = [] + for info_dict in config.get('infos', []): + if page.url != config["url"]: + page.goto(config["url"]) + page.wait_for_load_state('load') + action = info_dict.get('action', 'inner_text') + if action == "inner_text": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == "attribute": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + elif action == 'click_and_inner_text': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == 'click_and_attribute': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + else: + raise NotImplementedError(f'The action {action} is not supported yet.') + return infos + except Exception as e: + logger.error(f'[ERROR]: failed to obtain information from the website: {config["url"]}. Use backup results instead.') + return config.get('backups', None) + + # The following ones just need to load info from the files of software, no need to connect to the software def get_default_search_engine(env, config: Dict[str, str]): os_type = env.vm_platform diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 95c0a18..98f6e00 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,8 +1,27 @@ import os from typing import Dict, List, Set from typing import Optional, Any, Union - import requests +import pandas as pd + + +def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any: + """ + Config: + path (str): absolute path on the VM to fetch + """ + + path = config["path"] + file_path = get_vm_file(env, {"path": path, "dest": os.path.basename(path)}) + file_type, file_content = config['file_type'], config['file_content'] + if file_type == 'xlsx': + if file_content == 'last_row': + df = pd.read_excel(file_path) + last_row = df.iloc[-1] + last_row_as_list = last_row.astype(str).tolist() + return last_row_as_list + else: + raise NotImplementedError(f"File type {file_type} not supported") def get_cloud_file(env, config: Dict[str, Any]) -> Union[str, List[str]]: diff --git a/desktop_env/evaluators/getters/impress.py b/desktop_env/evaluators/getters/impress.py index ec4a154..d0673dd 100644 --- a/desktop_env/evaluators/getters/impress.py +++ b/desktop_env/evaluators/getters/impress.py @@ -40,20 +40,23 @@ def get_audio_in_slide(env, config: Dict[str, str]): audio_file_path = audio_file_path.replace('\\', '/') # Create a temporary directory to extract the audio file - with tempfile.TemporaryDirectory() as tmpdirname: - # Extract the audio file - myzip.extract(audio_file_path, tmpdirname) - # Get the full path of the extracted audio file - extracted_audio_path = os.path.join(tmpdirname, audio_file_path) - # Return the extracted audio file path - audio_file_path = extracted_audio_path - + tmpdirname = os.path.dirname(ppt_file_localhost_path) + myzip.extract(audio_file_path, tmpdirname) + audio_file_path = os.path.join(tmpdirname, audio_file_path) + return audio_file_path + # with tempfile.TemporaryDirectory() as tmpdirname: + # # Extract the audio file + # myzip.extract(audio_file_path, tmpdirname) + # # Get the full path of the extracted audio file + # extracted_audio_path = os.path.join(tmpdirname, audio_file_path) + # # Return the extracted audio file path + # audio_file_path = extracted_audio_path else: # the audio file is external to the .pptx file # Return the audio file path assert target.startswith("file://"), target audio_file_path = target[7:] - + break if audio_file_path is None: return None diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 27cd341..d5394e5 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -47,7 +47,8 @@ from .docs import ( check_file_exists, check_tabstops, compare_contains_image, - compare_docx_images + compare_docx_images, + compare_image_text ) from .general import ( check_csv, @@ -60,7 +61,8 @@ from .general import ( fuzzy_match, check_include_exclude, check_direct_json_object, - diff_text_file + diff_text_file, + literal_match ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 3c367b3..5018f5c 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -91,6 +91,17 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float: bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if bookmark['type'] == 'url'] return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0. + elif rule['type'] == "liked_authors_websites_urls": + # Check if "liked authors" folder exists + liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if + bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None) + if liked_authors_folder: + # Check if it contains the specified URLs + liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if + bookmark['type'] == 'url'] + return 1. if set(liked_authors_urls) == set(rule['urls']) else 0. + else: + return 0. else: raise TypeError(f"{rule['type']} not support yet!") diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index b44df9c..a38524d 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -58,6 +58,8 @@ def contains_page_break(docx_file): def compare_docx_files(file1, file2, **options): ignore_blanks = options.get('ignore_blanks', True) + ignore_case = options.get('ignore_case', False) + ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) def get_paragraph_texts_odt(document): @@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options): doc2 = Document(file2) doc1_paragraphs = [p.text for p in doc1.paragraphs] doc2_paragraphs = [p.text for p in doc2.paragraphs] + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) elif file1.endswith('.odt') and file2.endswith('.odt'): doc1 = load(file1) doc2 = load(file2) doc1_paragraphs = get_paragraph_texts_odt(doc1) doc2_paragraphs = get_paragraph_texts_odt(doc2) + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) else: # Unsupported file types or mismatch print("Unsupported file types or mismatch between file types.") @@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options): # Compare the content of the documents text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() similarity = fuzz.ratio(text1, text2) / 100.0 return similarity @@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options): if ignore_blanks: text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() if text1 != text2: return 0 else: @@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options): # Compare each paragraph for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if ignore_case: + p1, p2 = p1.lower(), p2.lower() if p1 != p2: return 0 @@ -183,6 +197,16 @@ def compare_docx_images(docx_file1, docx_file2): return 0 return 1 +import pytesseract + +def compare_image_text(image_path, rule): + img = Image.open(image_path) + img_text = pytesseract.image_to_string(img) + if rule['type'] == 'text': + return 1 if rule['text'] in img_text else 0 + else: + raise ValueError("Unsupported rule type") + def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): return 0 diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 3e15edb..1d7d707 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -43,6 +43,24 @@ def exact_match(result, rules) -> float: else: return 0. + +def literal_match(result: Any, expected: Any, **options) -> float: + literal_type = options.get('type', 'str') + if literal_type == 'str': + ignore_case = options.get('ignore_case', False) + score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower() + return float(score) + elif literal_type == 'list': + if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected): + return .0 + ignore_case = options.get('ignore_case', False) + result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result] + expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected] + return float(result == expected) + else: + raise NotImplementedError(f"Type {type} not supported") + + def is_in_list(result, rules) -> float: expect = rules["expected"] if expect in result: diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index 9bbfb38..76217e1 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -182,7 +182,7 @@ def compare_pptx_files(file1_path, file2_path, **options): else: return None - if get_slide_notes(slide1) != get_slide_notes(slide2) and examine_note: + if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note: return 0 # check if the shapes are the same for shape1, shape2 in zip(slide1.shapes, slide2.shapes): @@ -235,7 +235,7 @@ def compare_pptx_files(file1_path, file2_path, **options): return 0 if hasattr(shape1, "text") and hasattr(shape2, "text"): - if shape1.text != shape2.text and examine_text: + if shape1.text.strip() != shape2.text.strip() and examine_text: return 0 # check if the paragraphs are the same diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a67b6bb..b57de00 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -318,7 +318,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: return shared_strs[int(cell["c"]["v"])] if cell["c"]["@t"] == "str": return cell["c"]["v"] - except ValueError: + except (KeyError, ValueError): return None # }}} read_cell_value # diff --git a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json new file mode 100644 index 0000000..092a706 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json @@ -0,0 +1,61 @@ +{ + "id": "02ce9a50-7af2-47ed-8596-af0c230501f8", + "snapshot": "libreoffice_writer", + "instruction": "I'm using libreoffice writer to write a tutorial about linux, and now I want to show the results obtained by using the \"ls\" command in /home/user. Please run this command and save the screenshot as 'ls.png' on Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1cOYh50n0S3-Q_h8HDTCEPv2v6NsJl07m&export=download&authuser=0&confirm=t&uuid=9741b52a-d998-4875-94ec-ab0a45a40bf9&at=APZUnTX__WVALT4i0EIQoRAWkm-k:1709715620836", + "path": "/home/user/Desktop/top-10-linux-commands-for-newbies.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/top-10-linux-commands-for-newbies.docx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)" + ] + } + } + ], + "trajectory": "trajectories/02ce9a50-7af2-47ed-8596-af0c230501f8", + "related_apps": [ + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_image_text", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/ls.png", + "dest": "ls.png" + }, + "expected": { + "type": "rule", + "rules": { + "type": "text", + "text": "$ ls\n" + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json new file mode 100644 index 0000000..30c04b1 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json @@ -0,0 +1,48 @@ +{ + "id": "09a37c51-e625-49f4-a514-20a773797a8a", + "snapshot": "libreoffice_writer", + "instruction": "I received a request from my friend that he wanted me to help him modify a picture. On the Desktop is the requirement doc and the picture to be adjusted. Modify the image as he said and save modified pic as \"pic.jpg\" on Desktop. Thanks!", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1npPOtBdE5t_mzScyA94vDlxB8SCZNubv&export=download&authuser=0&confirm=t&uuid=64ee33b7-66a5-4f8e-9c98-95bea9521504&at=APZUnTW74Ntotdg-xRjaoxuDFju9:1709727407282", + "path": "/home/user/Desktop/requirment.docx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1eQHixX0TTExye0lbmHQny5CYhaREB5fQ&export=download&authuser=0&confirm=t&uuid=8546ef19-ea50-4e1a-9296-2db0302afd62&at=APZUnTVEmYOeAIdok_anTWOVHBCp:1709727527585", + "path": "/home/user/Desktop/ChMkKV8wsR6IBfEtABYfc0Tgu9cAAA1lQHO_78AFh-L733.jpg" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/requirment.docx" + } + } + ], + "trajectory": "trajectories/09a37c51-e625-49f4-a514-20a773797a8a", + "related_apps": [ + "libreoffice_writer", + "gimp", + "os" + ], + "evaluator": { + "func": "compare_images", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1Ee1vNyG7gGpLKK2VlLfj6PxcmdkMdvqK&export=download&authuser=0&confirm=t&uuid=1f441c5d-b62d-4850-870f-8e8f113a4091&at=APZUnTWEvKSSkuGBWzen0S9L7aHP:1709727474803", + "dest": "pic.jpg" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/pic.jpg", + "dest": "pic.jpg" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json new file mode 100644 index 0000000..30d5c38 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json @@ -0,0 +1,96 @@ +{ + "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "snapshot": "libreoffice_calc", + "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx", + "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download" + }, + { + "path": "/home/user/Desktop/review_template.pdf", + "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/review_template.pdf" + } + } + ], + "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "related_apps": [ + "libreoffice_calc", + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download", + "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download", + "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download", + "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download", + "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download", + "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download", + "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download" + ], + "dest": [ + "Alex Lee_Gold.pdf", + "David Wilson_Gold.pdf", + "Emily Johnson_Gold.pdf", + "John Doe_Gold.pdf", + "Linda Green_Gold.pdf", + "Michael Brown_Gold.pdf", + "Sophia Carter_Gold.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + }, + "expected": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/Alex Lee.pdf", + "/home/user/Desktop/David Wilson.pdf", + "/home/user/Desktop/Emily Johnson.pdf", + "/home/user/Desktop/John Doe.pdf", + "/home/user/Desktop/Linda Green.pdf", + "/home/user/Desktop/Michael Brown.pdf", + "/home/user/Desktop/Sophia Carter.pdf" + ], + "dest": [ + "Alex Lee.pdf", + "David Wilson.pdf", + "Emily Johnson.pdf", + "John Doe.pdf", + "Linda Green.pdf", + "Michael Brown.pdf", + "Sophia Carter.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json new file mode 100644 index 0000000..d22ddaf --- /dev/null +++ b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json @@ -0,0 +1,108 @@ +{ + "id": "22a4636f-8179-4357-8e87-d1743ece1f81", + "snapshot": "chrome", + "instruction": "Please help me convert the file \"Meeting-Agenda.docx\" to a pdf file and upload to the folder 'meetings' in my google drive.", + "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": [ + "delete", + "mkdirs" + ], + "args": [ + { + "query": "title = 'Meeting-Agenda.docx' or title = 'Meeting-Agenda.pdf'", + "trash": false + }, + { + "path": [ + "meetings" + ] + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://news.google.com", + "https://x.com", + "https://www.deepl.com" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=17fkMG4guromDzAHNCzzJieZHa2nJDBpc&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Meeting-Agenda.docx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/Desktop/Meeting-Agenda.docx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_writer", + "chrome" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "path": [ + "meetings", + "Meeting-Agenda.pdf" + ], + "dest": "Meeting-Agenda.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1wHWQ6mTJcNLtrL83GrUPzxx2HeOC4L8T&export=download&authuser=0&confirm=t", + "dest": "gold-Meeting-Agenda.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json new file mode 100644 index 0000000..3c5f349 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json @@ -0,0 +1,67 @@ +{ + "id": "236833a3-5704-47fc-888c-4f298f09f799", + "snapshot": "chrome", + "instruction": "Find daily papers on Huggingface and take down all the titles, authors and the abstracts of papers on 1st March, 2024 in the doc file 'paper_reading_2024_03_01.docx' on desktop. Each paragraph (split by empty lines) conforms to the following format:\nTitle: xxx\nAuthors: xxx, xxx, xxx\nAbstract: xxxxxxxx.\nArxiv PDF: https://xxxx.pdf", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://huggingface.co/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_docx_files", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/paper_reading_2024_03_01.docx", + "dest": "paper_reading_2024_03_01.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1TUTihXD93bIlekuYy_44fmXAhI1KVol4&export=download&authuser=0&confirm=t", + "dest": "gold_paper_reading_2024_03_01.docx" + }, + "options": { + "ignore_blanks": true, + "ignore_case": true, + "ignore_order": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json new file mode 100644 index 0000000..239d695 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -0,0 +1,102 @@ +{ + "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "snapshot": "libreoffice_calc", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/The literature reviews of weekly readings.docx", + "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download" + }, + { + "path": "/home/user/Desktop/students work/The British Justice System.docx", + "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz2.docx", + "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz.docx", + "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download" + }, + { + "path": "/home/user/Desktop/students work/Q1&2&3.docx", + "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download" + }, + { + "path": "/home/user/Desktop/students work/Photo Ethics in Journalism.docx", + "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json new file mode 100644 index 0000000..e1f0544 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -0,0 +1,26 @@ +{ + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json new file mode 100644 index 0000000..ca06a97 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json @@ -0,0 +1,44 @@ +{ + "id": "4c26e3f3-3a14-4d86-b44a-d3cedebbb487", + "snapshot": "libreoffice_impress", + "instruction": "I found the image on the second slide is too dark. Could you make it brighter for me? Keep the modified picture on Desktop. Name it \"background\". Thanks!", + "source": "https://www.quora.com/How-do-I-edit-a-photo-in-GIMP", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1SD5GlVUuG9RijMCEzuniPq1dIWxfr8xQ&export=download&authuser=0&confirm=t&uuid=67c13e54-9368-4e77-bda9-31776ec37479&at=APZUnTXGR14_2pztP7HoAiELiztJ:1709709598471", + "path": "/home/user/Desktop/PPT-Template_widescreen.pptx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/PPT-Template_widescreen.pptx" + } + } + ], + "trajectory": "trajectories/4c26e3f3-3a14-4d86-b44a-d3cedebbb487", + "related_apps": [ + "gimp", + "libreoffice_impress", + "os" + ], + "evaluator": { + "func": "check_brightness_decrease_and_structure_sim", + "expected": { + "type": "vm_file", + "path": "/home/user/Desktop/background.png", + "dest": "background.png" + }, + "result": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1lpRSXEoZq3ENOG5ekaAsBQSNv5ig0mDr&export=download&authuser=0&confirm=t&uuid=4cb10a33-81b3-4814-a969-f469832e33e5&at=APZUnTWN3pyiVpS003vLOgCcq2gu:1709710047375", + "dest": "image_original.png" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json new file mode 100644 index 0000000..9d754dd --- /dev/null +++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json @@ -0,0 +1,116 @@ +{ + "id": "5990457f-2adb-467b-a4af-5c857c92d762", + "snapshot": "chrome", + "instruction": "Append one entry of AI researcher Yann LeCun from Google Scholar into an existing table researchers.xlsx.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://arxiv.org/abs/2005.14165", + "https://wallhaven.cc/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EbQ75SHLthiZCBqCJtO0fLXJZcKrNler&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/researchers.xlsx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/Desktop" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_calc" + ], + "evaluator": { + "func": "literal_match", + "result": { + "type": "content_from_vm_file", + "path": "/home/user/Desktop/researchers.xlsx", + "file_type": "xlsx", + "file_content": "last_row" + }, + "expected": { + "type": "info_from_website", + "url": "https://scholar.google.com/citations?user=WLN3QrAAAAAJ&hl=en", + "infos": [ + { + "action": "inner_text", + "selector": "#gsc_prf_in" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(1) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(2) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(3) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a" + }, + { + "action": "click_and_attribute", + "selector": [ + "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a", + "#gsc_oci_title_gg > div:nth-child(1) > a" + ], + "attribute": "href" + } + ], + "backups": [ + "Yann LeCun", + "345074", + "147", + "372", + "Deep learning", + "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf" + ] + }, + "options": { + "type": "list", + "ignore_case": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json new file mode 100644 index 0000000..f414cc4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json @@ -0,0 +1,110 @@ +{ + "id": "67890eb6-6ce5-4c00-9e3d-fb4972699b06", + "snapshot": "libreoffice_calc", + "instruction": "I am an NLP researcher. Check out the best long paper awards of ACL from 2019 to 2022 and record the 'title', 'year', 'author list' and 'PDF link' into table best_awards_acl.xslx on the desktop. Separate authors only by commas, use offical aclanthology urls not arxiv, and sort rows by years in descending orders.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1v5DgViUgAL771kBWy-qBddVGyjGmgFhK", + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://aclanthology.org/" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "best_awards_acl.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/best_awards_acl.xlsx", + "dest": "best_awards_acl.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=17ORdOPl3sZGk3s4Wm0vESgImKZjdZBqX&export=download&authuser=0&confirm=t", + "dest": "gold_best_awards_acl.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json index 9c7a2b7..28542f8 100644 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json @@ -1,27 +1,25 @@ { - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", + "source": "authors", + "config": [ + { + } + ], + "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } diff --git a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json new file mode 100644 index 0000000..a18657a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json @@ -0,0 +1,89 @@ +{ + "id": "778efd0a-153f-4842-9214-f05fc176b877", + "snapshot": "vlc", + "instruction": "I'm using libreoffice impress to write slideshows. I found that the video being played by VLC media player had a good soundtrack. Please extract the audio to planet.wav and use it as background music for this slideshow.", + "source": "https://researchguides.case.edu/c.php?g=1286426", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1I-ArULOnZzlGkS9UyIuE8Dyuyus27iZt&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1HiW-PokIfWRyRvLwlkiVKKNCB2h2bcx7&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/planet.mp4" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "vlc", + "--repeat", + "/home/user/Desktop/planet.mp4" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_impress", + "vlc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Minimalist_Business_Slides.pptx - LibreOffice Impress", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_audios", + "result": { + "type": "audio_in_slide", + "ppt_file_path": "/home/user/Desktop/Minimalist_Business_Slides.pptx", + "slide_index": 0, + "dest": "planet.wav" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1F_iBPLgVg-2g1LQ5rxKyCrFa9UitZ9yn&export=download&authuser=0&confirm=t", + "dest": "gold_planet.wav" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json index 1214c6d..ce00111 100644 --- a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -34,21 +34,208 @@ } ] } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/my_bookkeeping.xlsx" + } } ], "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", "related_apps": [ - "", - "" + "libreoffice_calc", + "os", + "image", + "pdf" ], "evaluator": { - "postconfig": [], - "func": "", + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "my_bookkeeping.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", "result": { + "type": "vm_file", + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "dest": "my_bookkeeping.xlsx" }, "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download", + "dest": "my_bookkeeping_gold.xlsx" }, "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "A1:A8", + "B1:B8", + "C1:C8", + "D1:D8", + "E1:E8" + ], + "type": "exact_match" + } + ] + }, + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "C9:C13" + ], + "type": "exact_match", + "ignore_case": true + } + ] + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D9", + "props": { + "value": { + "method": "approx:0.1", + "ref": -186.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3670 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -5.7 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -154.06 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -8.1 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E9", + "props": { + "value": { + "method": "approx:0.1", + "ref": 603.07 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3066.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3072.63 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3226.69 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3234.79 + } + } + } + ] } } } diff --git a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json index 63cd0d5..504e5ce 100644 --- a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json +++ b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json @@ -4,6 +4,25 @@ "instruction": "I'm really enjoying this paper. Could you please find the personal webpages of the first author and the last three authors, and add them to a browser bookmark folder named 'Liked Authors'?", "source": "authors", "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, { "type": "download", "parameters": { @@ -40,15 +59,28 @@ ], "trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0", "related_apps": [ + "chrome", + "pdf" ], "evaluator": { - "postconfig": [], - "func": "", + "func": "is_expected_bookmarks", "result": { + "type": "bookmarks" }, "expected": { - }, - "options": { + "type": "rule", + "rules": { + "type": "liked_authors_websites_urls", + "names": [ + "Liked Authors" + ], + "urls": [ + "https://jimfan.me/", + "https://ai.stanford.edu/~dahuang/", + "https://yukezhu.me/", + "https://www.eas.caltech.edu/people/anima" + ] + } } } -} +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json new file mode 100644 index 0000000..0fbac44 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json @@ -0,0 +1,26 @@ +{ + "id": "d28853f4-500a-4749-b9dc-79c3576e759b", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! So, I've got this bit of a situation and I'm hoping you can help me out. I've been working on gathering research for this big project at work, and I've ended up with a bunch of PDF files from various sources. Each PDF contains some really crucial information that I need, but here's the kicker - I need all of this info to be in one place where I can easily access and edit it, like in a Word document or a README file.\n\nThe PDFs are a mix of things - some are reports with statistics and findings, others are articles with key insights, and a few are just informational brochures with important dates and details. What I need is for someone to go through each PDF, extract all the relevant information, and then organize it into a single document. I'm talking about making sure that all the stats, insights, dates, and details from each PDF are neatly compiled, so I don't miss anything.\n\nAnd to make things a bit more complicated, some of these PDFs are scanned images of documents, so the text isn't directly selectable. I guess that means you might need to manually type out some parts or find a way to convert the images to text that can be copied.\n\nOnce everything's been compiled into this one document, could you save it as both a Word doc and a README file? I'd like to have it in these two formats so I can easily share it with my team and also have a version that's ready to be uploaded to our project's repository.\n\nOh, and could you make sure to organize the information by the source PDF? Like, maybe start each section with the title of the PDF or a brief description of its contents, followed by all the extracted info from that PDF. This way, it'll be easier for me and the team to trace back to the original sources if we need to.\n\nI know it's a lot to ask, but having all this information consolidated and organized is going to be a huge help for moving forward with the project. Thanks a bunch for helping me tackle this!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/d28853f4-500a-4749-b9dc-79c3576e759b", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json deleted file mode 100644 index 049985c..0000000 --- a/evaluation_examples/settings/googledrive/credentials.json +++ /dev/null @@ -1 +0,0 @@ -{"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "token_expiry": "2024-03-02T13:59:28Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "expires_in": 3599, "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file