From fc15a33b70f3e6d280c5c63363612c7ca0ad547a Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Thu, 1 Feb 2024 00:53:31 +0800 Subject: [PATCH] finish multi-app examples --- desktop_env/envs/desktop_env.py | 8 +- desktop_env/evaluators/getters/chrome.py | 12 +- desktop_env/evaluators/metrics/__init__.py | 5 +- desktop_env/evaluators/metrics/chrome.py | 111 +++++++++-- desktop_env/evaluators/metrics/thunderbird.py | 41 ++++ .../78aed49a-a710-4321-a793-b611a7c5b56b.json | 177 ++++++++++++++++++ .../a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb.json | 132 +++++++++++++ .../c867c42d-a52d-4a24-8ae3-f75d256b5618.json | 98 ++++++++++ .../d9b7c649-c975-4f53-88f5-940b29c47247.json | 68 +++++++ .../e135df7c-7687-4ac0-a5f0-76b74438b53e.json | 91 +++++++++ .../f7dfbef3-7697-431c-883a-db8583a4e4f9.json | 121 ++++++++++++ .../settings/googledrive/credentials.json | 2 +- requirements.txt | 1 + 13 files changed, 844 insertions(+), 23 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/78aed49a-a710-4321-a793-b611a7c5b56b.json create mode 100644 evaluation_examples/examples/multi_apps/a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb.json create mode 100644 evaluation_examples/examples/multi_apps/c867c42d-a52d-4a24-8ae3-f75d256b5618.json create mode 100644 evaluation_examples/examples/multi_apps/d9b7c649-c975-4f53-88f5-940b29c47247.json create mode 100644 evaluation_examples/examples/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e.json create mode 100644 evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index 831a876..e870b21 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -294,6 +294,7 @@ class DesktopEnv(gym.Env): self.setup_controller.setup(self.evaluator.get("postconfig", [])) if type(self.metric) == list: + results = [] for idx, metric in enumerate(self.metric): try: config = self.evaluator["result"][idx] @@ -310,11 +311,12 @@ class DesktopEnv(gym.Env): **self.metric_options[idx]) if expected_state is not None \ else metric(result_state, **self.metric_options[idx]) - if self.metric_conj == 'and' and not bool(metric): + if self.metric_conj == 'and' and float(metric) == 0.0: return 0 - elif self.metric_conj == 'or' and bool(metric): + elif self.metric_conj == 'or' and float(metric) == 1.0: return 1 - return 1 if self.metric_conj == 'and' else 0 + else: results.append(metric) + return sum(results) / len(results) if self.metric_conj == 'and' else max(results) else: try: result_state = self.result_getter(self, self.evaluator["result"]) diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index bcfab09..6322238 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -490,16 +490,16 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str: parent_id = file['id'] file.GetContentFile(_path, mimetype=file['mimeType']) - except: - logger.info('[ERROR]: Failed to download the file from Google Drive') + except Exception as e: + logger.info('[ERROR]: Failed to download the file from Google Drive', e) return None return _path if 'query' in config: return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest'])) elif 'path' in config: - query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1 - else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])] + query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1 + else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])] return get_single_file(query, os.path.join(env.cache_dir, config['dest'])) elif 'query_list' in config: _path_list = [] @@ -512,8 +512,8 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str: _path_list = [] assert len(config['path_list']) == len(config['dest']) for idx, path in enumerate(config['path_list']): - query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1 - else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)] + query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1 + else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)] dest = config['dest'][idx] _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest))) return _path_list \ No newline at end of file diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 6e327f1..3f67ebf 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -8,6 +8,8 @@ from .chrome import ( is_expected_tabs, is_expected_bookmarks, compare_pdfs, + compare_htmls, + compare_archive, is_cookie_deleted, is_shortcut_on_desktop, check_font_size, @@ -92,7 +94,8 @@ from .table import ( ) from .thunderbird import ( check_thunderbird_prefs, - check_thunderbird_filter + check_thunderbird_filter, + check_thunderbird_folder ) from .vlc import ( is_vlc_playing, diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 2aa4456..b48519d 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -1,6 +1,6 @@ -import logging, re -from typing import Any, Dict, List - +import logging, re, os, shutil +from typing import Any, Dict, List, Union +from bs4 import BeautifulSoup, Tag import fitz # PyMuPDF import rapidfuzz.fuzz as fuzz @@ -14,7 +14,6 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f Checks if the expected tabs are open in Chrome. """ - print(open_tabs, rule) match_type = rule['type'] if match_type == "url": @@ -53,10 +52,12 @@ def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, A return 0. -def compare_pdfs(pdf1_path, pdf2_path): +def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[str]]): """ Compare two PDF files. """ + if type(pdf2_path) != list: + pdf1_path, pdf2_path = [pdf1_path], [pdf2_path] def extract_text_from_pdf(pdf_path): """Extract text from each page of the PDF.""" @@ -65,14 +66,100 @@ def compare_pdfs(pdf1_path, pdf2_path): for page in pdf: text += page.get_text() return text.strip() - try: - text1 = extract_text_from_pdf(pdf1_path) - text2 = extract_text_from_pdf(pdf2_path) - return fuzz.ratio(text1, text2) / 100 - except Exception as e: - logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") - return 0.0 + score = 0. + for path1, path2 in zip(pdf1_path, pdf2_path): + try: + text1 = extract_text_from_pdf(path1) + text2 = extract_text_from_pdf(path2) + score += fuzz.ratio(text1, text2) / 100 + except Exception as e: + logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") + return score / len(pdf2_path) + + +def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: + """ + Compare two archives. Note that the files in the archives should be of the same type. + """ + if not pred_path: return 0. + pred_folder = os.path.splitext(pred_path)[0] + '_pred' + gold_folder = os.path.splitext(gold_path)[0] + '_gold' + + if os.path.exists(pred_folder): # remove existing folder for new predictions + shutil.rmtree(pred_folder, ignore_errors=True) + os.makedirs(pred_folder) + shutil.unpack_archive(pred_path, pred_folder) + if not os.path.exists(gold_folder): # use cache if exists + os.makedirs(gold_folder) + shutil.unpack_archive(gold_path, gold_folder) + + pred_files = sorted(os.listdir(pred_folder)) + gold_files = sorted(os.listdir(gold_folder)) + if pred_files != gold_files: return 0. + + def get_compare_function(): + file_type = kwargs.pop('file_type', 'text') + if file_type == 'text': + from .vscode import compare_text_file + return compare_text_file + elif file_type == 'pdf': return compare_pdfs + elif file_type == 'docx': + from .docs import compare_docx_files + return compare_docx_files + elif file_type == 'ppt': + from .slides import compare_pptx_files + return compare_pptx_files + elif file_type == 'image': + from .vlc import compare_images + return compare_images + elif file_type == 'csv': + from .table import compare_csv + return compare_csv + elif file_type == 'table': + from .table import compare_table + return compare_table + elif file_type == 'audio': + from .vlc import compare_audios + return compare_audios + elif file_type == 'video': + from .vlc import compare_videos + return compare_videos + else: raise ValueError('[ERROR]: not support file type: %s' % file_type) + + score = 0 + compare_function = get_compare_function() + for f1, f2 in zip(pred_files, gold_files): + fp1 = os.path.join(pred_folder, f1) + fp2 = os.path.join(gold_folder, f2) + score += compare_function(fp1, fp2, **kwargs) + return score / len(pred_files) + + +def compare_htmls(html_path1: str, html_path2: str) -> float: + """ + Compare two HTML files. + """ + with open(html_path1, 'r', encoding='utf-8') as inf: + soup1 = BeautifulSoup(inf, 'lxml') + with open(html_path2, 'r', encoding='utf-8') as inf: + soup2 = BeautifulSoup(inf, 'lxml') + + def compare_elements(elem1, elem2): + if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)): + return elem1 == elem2 + if elem1.name != elem2.name: + return False + if elem1.text.strip() != elem2.text.strip(): + return False + if elem1.attrs != elem2.attrs: + return False + return True + + for elem1, elem2 in zip(soup1.recursiveChildGenerator(), soup2.recursiveChildGenerator()): + if not compare_elements(elem1, elem2): + return .0 + return 1. def is_cookie_deleted(cookie_data, rule): diff --git a/desktop_env/evaluators/metrics/thunderbird.py b/desktop_env/evaluators/metrics/thunderbird.py index d0d3f85..90b8892 100644 --- a/desktop_env/evaluators/metrics/thunderbird.py +++ b/desktop_env/evaluators/metrics/thunderbird.py @@ -128,6 +128,47 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]] unexpect_metric = unexpect_metric and not any(_match_record(r, flt) for r in rules.get("unexpect", [])) return float(all(expect_metrics) and unexpect_metric) + +def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str, List[str]], **kwargs) -> float: + """ + Check the file or file_list that each text file contains all messages in a folder in Thunderbird. Each message is started with `FROM - `. + **kwargs: + ignore_status (bool): for comparison, ignore the status (X-Mozilla-Status: 0000) of each message. default: False + ignore_keys (bool): for comparison, ignore the keys (X-Mozilla-Keys: label) of each message. default: False + remove_deleted (bool): ignore deleted messages which has status code 0008 or 0009. default: True + remove_duplicate (bool): remove duplicate messages. default: True + """ + def normalize_msg(msg, options): + ignore_status = options.get('ignore_status', False) + ignore_keys = options.get('ignore_keys', False) + if ignore_status: + msg = re.sub(r'X-Mozilla-Status\d?:[\s\d]+', '', msg) + if ignore_keys: + msg = re.sub(r'(X-Mozilla-Keys:[^\n]*?)\n(MIME-Version)', r'\2', msg) + return msg.strip() + + def read_thunderbird_folder_file(path: str) -> str: + with open(path, 'r') as inf: + data = inf.read().strip() + messages = [] + for mail in data.split('FROM - '): + if mail.strip(): continue + if kwargs.get('remove_deleted', True) and re.search(r'X-Mozilla-Status: 000[89]', mail): continue + messages.append('FROM - ' + normalize_msg(mail, kwargs)) + if kwargs.get('remove_duplicate', True): + messages = set(messages) + return '\n'.join(sorted(messages)) + + if type(reference) != list: + result, reference = [result], [reference] + for pred, gold in zip(result, reference): + if pred is None: return .0 + mail1 = read_thunderbird_folder_file(pred) + mail2 = read_thunderbird_folder_file(gold) + if mail1 != mail2: return .0 + return 1.0 + + if __name__ == "__main__": #import lxml.etree #from lxml.cssselect import CSSSelector diff --git a/evaluation_examples/examples/multi_apps/78aed49a-a710-4321-a793-b611a7c5b56b.json b/evaluation_examples/examples/multi_apps/78aed49a-a710-4321-a793-b611a7c5b56b.json new file mode 100644 index 0000000..170c685 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/78aed49a-a710-4321-a793-b611a7c5b56b.json @@ -0,0 +1,177 @@ +{ + "id": "78aed49a-a710-4321-a793-b611a7c5b56b", + "snapshot": "chrome", + "instruction": "Could you help me save all attachments of the oldest email in Bills local folders to the attachment/ folder in Google Drive and then move this email to a different folder \"have_seen\" in Local Folders.", + "source": "https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": ["delete"], + "args": [ + { + "query": "title = 'attachment' and 'root' in parents and trashed = false", + "trash": false + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://stackoverflow.com/", + "https://paperswithcode.com/" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1W18fZ4Dog-cSH3Fxa3bcfTz28z6AsL_1&export=download&authuser=0&confirm=t&uuid=7c8fc164-6e7b-4849-aba1-f3dba947feba&at=APZUnTWd2DFbIYnlMf-GY4qMqXcc:1706682437093", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xz", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "chrome" + ], + "evaluator": { + "func": [ + "check_thunderbird_folder", + "compare_pdfs" + ], + "result": [ + { + "type": "vm_file", + "multi": true, + "path": [ + "/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/have_seen", + "/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/Bills" + ], + "dest": [ + "have_seen", + "Bills" + ], + "gives": [ + 0, + 1 + ] + }, + { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "path_list": [ + [ + "attachment", + "Invoice-01234567-89ab.pdf" + ], + [ + "attachment", + "Receipt-0123-4567.pdf" + ] + ], + "dest": [ + "Invoice-01234567-89ab.pdf", + "Receipt-0123-4567.pdf" + ] + } + ], + "expected": [ + { + "type": "cloud_file", + "multi": true, + "path": [ + "https://drive.usercontent.google.com/download?id=12ielR0p3jFZ4UDfGlC7fdF88uUTM_3bn&export=download&authuser=0&confirm=t&uuid=6ef47d29-2772-49cd-87e7-17efd587f1ba&at=APZUnTUGjXxDcDcMF5RoAsVtbFOB:1706689562530", + "https://drive.usercontent.google.com/download?id=13XQ4Hr62WyHVwpefjD-tgkpjb0p9HKpe&export=download&authuser=0&confirm=t&uuid=ad4ee033-83e4-4df4-9f9c-ac61d43634bc&at=APZUnTV5ZI1GFXNHTyIblwSKjyku:1706692277803" + ], + "dest": [ + "have_seen_gold", + "Bills_gold" + ], + "gives": [ + 0, + 1 + ] + }, + { + "type": "cloud_file", + "multi": true, + "path": [ + "https://drive.usercontent.google.com/download?id=1SlzOPRfzaKyhBERy-ks2-rjzrjtvgoDG&export=download&authuser=0&confirm=t&uuid=e4cb717b-fb5d-4860-99d9-9e1f38df2592&at=APZUnTXlcSimcf9qZ7uZmlAUj-zQ:1706683984205", + "https://drive.usercontent.google.com/download?id=19SdAYymlHvFQ7wzc-_JfFYOYMW6xAw5-&export=download&authuser=0&confirm=t&uuid=e1cbff64-d615-493d-9d02-bcdbd40f878c&at=APZUnTVRpcei6q_BjzQ7EtHA7voq:1706683990388" + ], + "dest": [ + "Invoice-01234567-89ab_gold.pdf", + "Receipt-0123-4567_gold.pdf" + ], + "gives": [ + 0, + 1 + ] + } + ], + "options": [ + { + "remove_deleted": true, + "remove_duplicate": true + }, + {} + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb.json b/evaluation_examples/examples/multi_apps/a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb.json new file mode 100644 index 0000000..82e96a5 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb.json @@ -0,0 +1,132 @@ +{ + "id": "a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb", + "snapshot": "chrome", + "instruction": "Please help me backup my emails in \"Bills\" folder in Thunderbird and store the .eml files with only subject names to my Google Drive folder called \"emails\".", + "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": ["delete"], + "args": [ + { + "query": "title = 'emails' and 'root' in parents", + "trash": false + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://arxiv.org/", + "https://github.com/ohmyzsh/ohmyzsh/wiki/themes", + "https://releases.ubuntu.com/" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1cATYRGGbZ_vZEjJWajI1Dn4gmp3Won-l&export=download&authuser=0&confirm=t&uuid=156022ae-a56a-400c-a934-34e4369ec82a&at=APZUnTWMnGMhJciITn7IvRY33zuJ:1706707804986", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xz", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "chrome" + ], + "evaluator": { + "func": "check_thunderbird_folder", + "result": { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "path_list": [ + [ + "emails", + "Amazon Web Services Invoice Available [Account: 0123456789ab] [Invoice ID: 0123456789].eml" + ], + [ + "emails", + "Your receipt from X (formerly Twitter) #0123-4567.eml" + ] + ], + "dest": [ + "pred1.eml", + "pred2.eml" + ] + }, + "expected": { + "type": "cloud_file", + "multi": "true", + "path": [ + "https://drive.usercontent.google.com/download?id=1Fb2ofAjfn-wlIYbtIGXNvLJEB85KbO7Y&export=download&authuser=0&confirm=t&uuid=9a996347-9093-43ed-8ad2-7e769e0576bd&at=APZUnTUp2pvXjM0zGvjc22lewOv3:1706708252395", + "https://drive.usercontent.google.com/download?id=1LZ2PgwmVgO62dNOueWy7RLlJ1_d8Hz3E&export=download&authuser=0&confirm=t&uuid=f632abc2-3ff7-4e9f-a7c6-72c9bbc44654&at=APZUnTXihve7i15GwLyEx2rwDFUk:1706708265408" + ], + "dest": [ + "gold1.eml", + "gold2.eml" + ], + "gives": [ + 0, + 1 + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/c867c42d-a52d-4a24-8ae3-f75d256b5618.json b/evaluation_examples/examples/multi_apps/c867c42d-a52d-4a24-8ae3-f75d256b5618.json new file mode 100644 index 0000000..78afc9b --- /dev/null +++ b/evaluation_examples/examples/multi_apps/c867c42d-a52d-4a24-8ae3-f75d256b5618.json @@ -0,0 +1,98 @@ +{ + "id": "c867c42d-a52d-4a24-8ae3-f75d256b5618", + "snapshot": "thunderbird", + "instruction": "Please assist me in exporting my contacts of Personal Address Book from Thunderbird into contacts.csv file in the desktop and convert it to .xlsx with Libreoffice Calc.", + "source": "https://www.sync.blue/en/sync/mozilla-thunderbird/google-sheets/", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--calc" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1wKXmJ14dnxSzdy9ZF_ePWU7zpevY6Dry&export=download&authuser=0&confirm=t&uuid=9b476c95-8eee-4a9a-8cee-c3620d5ce250&at=APZUnTUzDeeeMNr34DB1vEnBK6N7:1706719624132", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "--recursive-unlink", + "-xz", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "func": [ + "compare_csv", + "compare_table" + ], + "conj": "and", + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/contacts.csv", + "dest": "contacts.csv" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/contacts.xlsx", + "dest": "contacts.xlsx" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1StwASpAR2ALq2Y1vugGsdUJptg6FwjEm&export=download&authuser=0&confirm=t&uuid=56339e19-b889-4da1-ab72-5e0b90f13fff&at=APZUnTVWFF2pBrtWU_hXgzfbrWP2:1706719668676", + "dest": "contacts_gold.csv" + }, + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1s25eUpvkMzSm6p_WA7O13t6mVqmkxr2C&export=download&authuser=0&confirm=t&uuid=901cbd32-6026-4391-a5cc-989e1047cf7c&at=APZUnTUs27mZceDshB_f9Tx4PFyz:1706719610831", + "dest": "contacts_gold.xlsx" + } + ], + "options": [ + {}, + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RI0", + "sheet_idx1": "EI0" + } + ] + } + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/d9b7c649-c975-4f53-88f5-940b29c47247.json b/evaluation_examples/examples/multi_apps/d9b7c649-c975-4f53-88f5-940b29c47247.json new file mode 100644 index 0000000..d859e15 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d9b7c649-c975-4f53-88f5-940b29c47247.json @@ -0,0 +1,68 @@ +{ + "id": "d9b7c649-c975-4f53-88f5-940b29c47247", + "snapshot": "thunderbird", + "instruction": "Help me extract the latest 5 emails in daily folder from Thunderbird, from the earliest to the most recent by time, and creates a LibreOffice Calc Report \"report.xlsx\" in the desktop, storing the sender_name, sender_address, subject, CC, and number_of_attachments.", + "source": "https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1aCmZvSIs8Mb2kM_IVyhTbLawFvHOrlW-&export=download&authuser=0&confirm=t&uuid=dee0fe8b-731b-4bb7-97c4-02a2d8154da8&at=APZUnTVO-WjksU7WYUq4sCkNMlsL:1706710969972", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xz", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/report.xlsx", + "dest": "report.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=13L73ukCzuLYUgY11xz2b6DEoYhFvmXVE&export=download&authuser=0&confirm=t&uuid=82e92b1f-d4c0-44e6-b40f-595dff880acd&at=APZUnTXzPvN60uTLjy7QSPEF2Ft9:1706714031096", + "dest": "report_gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RI0", + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e.json b/evaluation_examples/examples/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e.json new file mode 100644 index 0000000..3edcb19 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e.json @@ -0,0 +1,91 @@ +{ + "id": "e135df7c-7687-4ac0-a5f0-76b74438b53e", + "snapshot": "libreoffice_calc", + "instruction": "Please convert a .xlsx file opened in LibreOffice Calc to a .html file and view it in Chrome.", + "source": "https://www.ilovefreesoftware.com/23/featured/free-csv-to-html-converter-software-windows.html", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://aclanthology.org/", + "https://openai.com/", + "https://www.linkedin.com/home/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1zio_nULUdQGFGFBD55aZ52vhqYBtlQeJ&export=download&authuser=0&confirm=t&uuid=68483de0-2035-461d-90d5-e4048825d1ce&at=APZUnTXRMiB4UDzbsqrgm6BbFefE:1706630941497", + "path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "chrome" + ], + "evaluator": { + "func": ["is_expected_tabs", "compare_htmls"], + "result": [ + { + "type": "open_tabs_info" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html", + "dest": "annual-enterprise-survey-2021-financial-year-provisional.html" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "type": "url", + "urls": [ + "https://aclanthology.org/", + "https://openai.com/", + "https://www.linkedin.com/home/", + "file:///home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html" + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1HvdTjLZctQGgo3BojmPBiSrPXFkqubJi&export=download&authuser=0&confirm=t&uuid=47f78e2c-bc38-416b-900f-4837ff588bfa&at=APZUnTXSartmo7MSTsanrkc9zHpm:1706680670213", + "dest": "annual-enterprise-survey-2021-financial-year-provisional_gold.html" + } + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json b/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json new file mode 100644 index 0000000..77114b9 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json @@ -0,0 +1,121 @@ +{ + "id": "f7dfbef3-7697-431c-883a-db8583a4e4f9", + "snapshot": "libreoffice_writer", + "instruction": "Could you convert all `.doc` files in current directory to PDF all at once in the command line?", + "source": "https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1efUzatD7vixloPY9-XENOL3OZ5__1hod&export=download&authuser=0&confirm=t&uuid=14a3a6b1-a6e2-46cf-9e5b-bcc9e2acbc98&at=APZUnTWtyQUzT6eO5ZTcnmAZkwyX:1706704733958", + "path": "/home/user/Desktop/doc.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "tar -zxf /home/user/Desktop/doc.tar.gz -C /home/user/Desktop/ && rm /home/user/Desktop/doc.tar.gz" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "history -c && echo > ~/.bash_history && sleep 3" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "gnome-terminal", + "--maximize", + "--working-directory=/home/user/Desktop" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_writer", + "terminal" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "cd /home/user/Desktop && tar -zcf pdf.tar.gz *.pdf" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "killall gnome-terminal-server" + ] + } + } + ], + "func": [ + "check_include_exclude", + "compare_archive" + ], + "result": [ + { + "type": "vm_command_line", + "command": [ + "/bin/bash", + "-c", + "output=$(cat ~/.bash_history | grep -E \"(soffice|libreoffice).+--convert-to\\s+pdf.+\\*\\.doc\"); if [ -z \"$output\" ]; then echo \"failed to complete this task\"; else echo \"catch the desired command\"; fi" + ] + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/pdf.tar.gz", + "dest": "pdf.tar.gz" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "include": [ + "catch the desired command" + ], + "exclude": [ + "failed to complete this task" + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1A-UoQdQvvtFBQWkPi_Q8ecNe4gjlh0dg&export=download&authuser=0&confirm=t&uuid=0c97e2f6-3de8-438f-a676-557cca5ee292&at=APZUnTUy4pghj5TtS-suVMXrCLO2:1706705033330", + "dest": "pdf_gold.tar.gz" + } + ], + "options": [ + {}, + { + "file_type": "pdf" + } + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json index 2c44560..1be4912 100644 --- a/evaluation_examples/settings/googledrive/credentials.json +++ b/evaluation_examples/settings/googledrive/credentials.json @@ -1 +1 @@ -{"access_token": "ya29.a0AfB_byDIizbKXOjPLHrhl3tX0xhI_Pv2U1qv_nnhX_V_QWjCKvq9es2Pgc21SY8z8W4zolgSN4RipR2iA5zHYIDkH1sn6ZhcqC_ExxrmJ_OgZt71ljzfhfh19CfGjr_ki7HoXa_UOg4X__N0IPpy5UTFE8aU1T4F3dIMaCgYKAS0SARISFQHGX2MiW_ZTvRlqH2LYf6ylkeSB9Q0171", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0ehtafHmucszRCgYIARAAGA4SNwF-L9IrpDBsnzdHKAlRfrkvzNFw1cpdnRY8rhM5gy4flsPYdysMav27yHamJx39BBGq-LLw40s", "token_expiry": "2024-01-31T11:54:11Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byDIizbKXOjPLHrhl3tX0xhI_Pv2U1qv_nnhX_V_QWjCKvq9es2Pgc21SY8z8W4zolgSN4RipR2iA5zHYIDkH1sn6ZhcqC_ExxrmJ_OgZt71ljzfhfh19CfGjr_ki7HoXa_UOg4X__N0IPpy5UTFE8aU1T4F3dIMaCgYKAS0SARISFQHGX2MiW_ZTvRlqH2LYf6ylkeSB9Q0171", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file +{"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0ehtafHmucszRCgYIARAAGA4SNwF-L9IrpDBsnzdHKAlRfrkvzNFw1cpdnRY8rhM5gy4flsPYdysMav27yHamJx39BBGq-LLw40s", "token_expiry": "2024-01-31T14:41:25Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 103820e..ab1dcf1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,3 +39,4 @@ fastdtw odfpy openai func-timeout +beautifulsoup4 \ No newline at end of file