From f194fb8d75186c9bfaf07d7d096bb6f356f1253b Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Thu, 25 Jan 2024 13:53:19 +0800 Subject: [PATCH] add multi_apps; update chrome utilities --- desktop_env/controllers/setup.py | 102 +++++++++++++++++- desktop_env/evaluators/getters/__init__.py | 5 +- desktop_env/evaluators/getters/chrome.py | 51 ++++++++- desktop_env/evaluators/metrics/__init__.py | 4 +- desktop_env/evaluators/metrics/chrome.py | 22 +++- desktop_env/server/main.py | 20 ++++ .../897e3b53-5d4d-444b-85cb-2cdc8a97d903.json | 93 ++++++++++++++++ .../f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json | 73 +++++++++++++ .../settings/google/settings.json | 4 + .../settings/googledrive/client_secrets.json | 13 +++ .../settings/googledrive/credentials.json | 1 + .../settings/googledrive/settings.yml | 8 ++ requirements.txt | 1 + 13 files changed, 383 insertions(+), 14 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json create mode 100644 evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json create mode 100644 evaluation_examples/settings/google/settings.json create mode 100644 evaluation_examples/settings/googledrive/client_secrets.json create mode 100644 evaluation_examples/settings/googledrive/credentials.json create mode 100644 evaluation_examples/settings/googledrive/settings.yml diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 9c6b559..e30b279 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -8,7 +8,9 @@ from typing import Any, Union, Optional from typing import Dict, List import requests -from playwright.sync_api import sync_playwright +from pydrive.auth import GoogleAuth +from pydrive.drive import GoogleDrive, GoogleDriveFile, GoogleDriveFileList +from playwright.sync_api import sync_playwright, TimeoutError from requests_toolbelt.multipart.encoder import MultipartEncoder from desktop_env.evaluators.metrics.utils import compare_urls @@ -46,7 +48,7 @@ class SetupController: # Assumes all the setup the functions should follow this name # protocol setup_function: str = "_{:}_setup".format(config_type) - assert hasattr(self, setup_function) + assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}' getattr(self, setup_function)(**parameters) logger.info("SETUP: %s(%s)", setup_function, str(parameters)) @@ -416,3 +418,99 @@ class SetupController: # Do not close the context or browser; they will remain open after script ends return browser, context + + # google drive setup + def _googledrive_setup(self, **config): + """ Clean google drive space (eliminate the impact of previous experiments to reset the environment) + @args: + config(Dict[str, Any]): contain keys + settings_file(str): path to google drive settings file, which will be loaded by pydrive.auth.GoogleAuth() + operation(List[str]): each operation is chosen from ['delete', 'upload'] + args(List[Dict[str, Any]]): parameters for each operation + different args dict for different operations: + for delete: + query(str): query pattern string to search files or folder in google drive to delete, please refer to + https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. + By default, move all files/folders into trash (can be recovered). + trash(bool): whether to delete files permanently or move to trash. By default, trash=True, just move to trash. + TODO: other operations + """ + settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml') + gauth = GoogleAuth(settings_file=settings_file) + drive = GoogleDrive(gauth) + + for oid, operation in enumerate(config['operation']): + if operation == 'delete': # delete a specific file + # query pattern string, by default, remove all files/folders not in the trash to the trash + params = config['args'][oid] + q = params.get('query', 'trashed = false') + trash = params.get('trash', True) + filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() + for file in filelist: + file: GoogleDriveFile + # note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted + # this is the same for UnTrash + if trash: file.Trash() + else: file.Delete() + elif operation == 'upload': + pass + else: + raise ValueError('[ERROR]: not implemented clean type!') + + + def _login_setup(self, **config): + """ Login to a website with account and password information. + @args: + config(Dict[str, Any]): contain keys + settings_file(str): path to the settings file + platform(str): platform to login, implemented platforms include: + googledrive: https://drive.google.com/drive/my-drive + + """ + host = self.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + browser = None + for attempt in range(15): + try: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + break + except Exception as e: + if attempt < 14: + logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}") + time.sleep(1) + else: + logger.error(f"Failed to connect after multiple attempts: {e}") + raise e + if not browser: + return + + context = browser.contexts[0] + platform = config['platform'] + + if platform == 'googledrive': + url = 'https://drive.google.com/drive/my-drive' + page = context.new_page() # Create a new page (tab) within the existing context + page.goto(url) + logger.info(f"Opened new page: {url}") + settings = json.load(open(config['settings_file'])) + email, password = settings['account'], settings['password'] + + try: + page.wait_for_selector('input[type="email"]', state="visible", timeout=3000) + page.fill('input[type="email"]', email) + page.click('#identifierNext > div > button') + page.wait_for_selector('input[type="password"]', state="visible", timeout=5000) + page.fill('input[type="password"]', password) + page.click('#passwordNext > div > button') + page.wait_for_load_state('load', timeout=5000) + except TimeoutError: + logger.info('[ERROR]: timeout when waiting for google drive login page to load!') + return + + else: + raise NotImplementedError + + return browser, context \ No newline at end of file diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 5074a2d..9e2e636 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -1,7 +1,6 @@ from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \ get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \ - get_profile_name, \ - get_number_of_search_results + get_profile_name, get_number_of_search_results, get_googledrive_file from .file import get_cloud_file, get_vm_file, get_cache_file from .general import get_vm_command_line from .impress import get_audio_in_slide @@ -9,4 +8,4 @@ from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper from .misc import get_rule, get_accessibility_tree from .replay import get_replay from .vlc import get_vlc_playing_info, get_vlc_config -from .vscode import get_vscode_config +from .vscode import get_vscode_config \ No newline at end of file diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index d39d263..2724381 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -2,8 +2,9 @@ import json import logging import os import sqlite3 -from typing import Dict - +from typing import Dict, Any +from pydrive.auth import GoogleAuth +from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile from playwright.sync_api import sync_playwright logger = logging.getLogger("desktopenv.getters.chrome") @@ -461,3 +462,49 @@ def get_number_of_search_results(env, config: Dict[str, str]): browser.close() return actual_count + + +def get_googledrive_file(env, config: Dict[str, Any]) -> str: + """ Get the desired file from Google Drive based on config, return the downloaded local filepath. + """ + settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json') + auth = GoogleAuth(settings_file=settings_file) + drive = GoogleDrive(auth) + + q = config['query'] + filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() + if len(filelist) == 0: # target file not found + return None + + file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one + _path = os.path.join(env.cache_dir, config['dest']) + + try: + file.GetContentFile(_path, mimetype=file.metadata['mimeType']) + except: + logger.info('[ERROR]: Failed to download the file from Google Drive') + return None + return _path + + +def get_googledrive_file(env, config: Dict[str, Any]) -> str: + """ Get the desired file from Google Drive based on config, return the downloaded local filepath. + """ + settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json') + auth = GoogleAuth(settings_file=settings_file) + drive = GoogleDrive(auth) + + q = config['query'] + filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() + if len(filelist) == 0: # target file not found + return None + + file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one + _path = os.path.join(env.cache_dir, config['dest']) + + try: + file.GetContentFile(_path, mimetype=file.metadata['mimeType']) + except: + logger.info('[ERROR]: Failed to download the file from Google Drive') + return None + return _path \ No newline at end of file diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index aea27d1..ff7b8dd 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,5 +1,5 @@ from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop, check_font_size, \ - check_enabled_experiments, check_history_deleted + check_enabled_experiments, check_history_deleted, is_expected_search_query from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers, compare_docx_lines from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \ compare_insert_equation, compare_highlighted_text @@ -20,4 +20,4 @@ from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \ compare_videos, check_qt_bgcone, check_one_instance_when_started_from_file,check_qt_minimal_view, check_qt_max_volume, \ check_qt_slider_colours, check_global_key_play_pause -from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings +from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings \ No newline at end of file diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index a1f353e..2aa4456 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -1,4 +1,4 @@ -import logging +import logging, re from typing import Any, Dict, List import fitz # PyMuPDF @@ -44,6 +44,15 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float: raise TypeError(f"{rule['type']} not support yet!") +def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, Any]) -> float: + expected = rules['expect'] + pattern = expected['pattern'] + matched = re.search(pattern, active_tab_info['url']) + if matched: + return 1. + return 0. + + def compare_pdfs(pdf1_path, pdf2_path): """ Compare two PDF files. @@ -56,11 +65,14 @@ def compare_pdfs(pdf1_path, pdf2_path): for page in pdf: text += page.get_text() return text.strip() + try: + text1 = extract_text_from_pdf(pdf1_path) + text2 = extract_text_from_pdf(pdf2_path) - text1 = extract_text_from_pdf(pdf1_path) - text2 = extract_text_from_pdf(pdf2_path) - - return fuzz.ratio(text1, text2) / 100 + return fuzz.ratio(text1, text2) / 100 + except Exception as e: + logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") + return 0.0 def is_cookie_deleted(cookie_data, rule): diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index df31e99..5054c4f 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -61,6 +61,18 @@ def execute_command(): }), 500 +def _get_machine_architecture() -> str: + """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc. + """ + architecture = platform.machine().lower() + if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']: + return 'amd' + elif architecture in ['arm64', 'aarch64', 'aarch32']: + return 'arm' + else: + return 'unknown' + + @app.route('/setup/launch', methods=["POST"]) def launch_app(): data = request.json @@ -71,6 +83,9 @@ def launch_app(): command = shlex.split(command) try: + if 'google-chrome' in command and _get_machine_architecture() == 'arm': + index = command.index('google-chrome') + command[index] = 'chromium-browser' # arm64 chrome is not available yet, can only use chromium subprocess.Popen(command, shell=shell) return "{:} launched successfully".format(command if shell else " ".join(command)) except Exception as e: @@ -287,6 +302,11 @@ def _create_atspi_node(node: Accessible) -> _Element: ) if "text" in locals() and len(text) > 0: xml_node.text = text + + # HACK: libreoffice has a problem -> billions of children for parent with RoleName "document spreadsheet" + if node.getRoleName() == "document spreadsheet": + return xml_node + for ch in node: xml_node.append(_create_atspi_node(ch)) return xml_node diff --git a/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json b/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json new file mode 100644 index 0000000..6bf6538 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json @@ -0,0 +1,93 @@ +{ + "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903", + "snapshot": "chrome", + "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in Google Drive.", + "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": ["delete"], + "args": [ + { + "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf'", + "trash": false + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "chromium-browser", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.zhihu.com/", + "https://www.coursera.org/", + "https://www.wikidata.org/wiki/Wikidata:Main_Page" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=18TvzE8jnULU2g9XJsT-TaPEKcLGNVfu0&export=download&authuser=0&confirm=t&uuid=d914e031-9aa6-431b-81c0-73fcb87af027&at=APZUnTUx56WM_I3gnhHo-eZX__kx:1706158167271", + "path": "/home/user/Desktop/form.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/form.docx" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_writer", + "chrome" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "query": "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false", + "dest": "form.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=118wb7zmG8yP7DS1cImP9-GcOeKib3fLp&export=download&authuser=0&confirm=t&uuid=b82542fa-7731-4014-8ebc-d940f0fb83fe&at=APZUnTVkmL9rk3EpA0Ak5JLPEnJZ:1706101389421", + "dest": "form_gold.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json b/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json new file mode 100644 index 0000000..b584798 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json @@ -0,0 +1,73 @@ +{ + "id": "f8cfa149-d1c1-4215-8dac-4a0932bad3c2", + "snapshot": "chrome", + "instruction": "Could you help me copy the data in Cell B6 in this Libreoffice Calc file and search it in the Chrome browser.", + "source": "https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1MHk40tzMiyOwNbRj1W-_A-nIisk1TbuM&export=download&authuser=0&confirm=t&uuid=a8674a99-fb5f-4aeb-98ca-338197d73890&at=APZUnTW2thoQGMqgEyD0lNcRISJ_:1706006672423", + "path": "/home/user/cell_search.xlsx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://en.wikipedia.org/wiki/Main_Page", + "https://pytorch.org/tutorials/", + "https://releases.ubuntu.com/" + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/cell_search.xlsx" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "func": "is_expected_search_query", + "result": { + "type": "active_tab_info" + }, + "expected": { + "type": "rule", + "rules": { + "expect": { + "pattern": "www\\.google\\.com.*?/search\\?q=Nereida&" + } + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/settings/google/settings.json b/evaluation_examples/settings/google/settings.json new file mode 100644 index 0000000..a72dd31 --- /dev/null +++ b/evaluation_examples/settings/google/settings.json @@ -0,0 +1,4 @@ +{ + "account": "xlang2024anonym@gmail.com", + "password": "q]wN~0iD>H:6" +} \ No newline at end of file diff --git a/evaluation_examples/settings/googledrive/client_secrets.json b/evaluation_examples/settings/googledrive/client_secrets.json new file mode 100644 index 0000000..9bb7044 --- /dev/null +++ b/evaluation_examples/settings/googledrive/client_secrets.json @@ -0,0 +1,13 @@ +{ + "installed": { + "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", + "project_id": "xlang-2024-benchmarking", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_secret": "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", + "redirect_uris": [ + "http://localhost" + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json new file mode 100644 index 0000000..491c002 --- /dev/null +++ b/evaluation_examples/settings/googledrive/credentials.json @@ -0,0 +1 @@ +{"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-25T05:51:17Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file diff --git a/evaluation_examples/settings/googledrive/settings.yml b/evaluation_examples/settings/googledrive/settings.yml new file mode 100644 index 0000000..d3e52e5 --- /dev/null +++ b/evaluation_examples/settings/googledrive/settings.yml @@ -0,0 +1,8 @@ +client_config_backend: file +client_config_file: evaluation_examples/settings/googledrive/client_secrets.json + +save_credentials: True +save_credentials_backend: file +save_credentials_file: evaluation_examples/settings/googledrive/credentials.json + +get_refresh_token: True \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a31275c..68ca347 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,3 +34,4 @@ chardet playwright backoff formulas +pydrive