add multi_apps; update chrome utilities

2024-01-25 13:53:19 +08:00
parent 37ec196b18
commit f194fb8d75
13 changed files with 383 additions and 14 deletions
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -8,7 +8,9 @@ from typing import Any, Union, Optional
 from typing import Dict, List
 import requests
-from playwright.sync_api import sync_playwright
+from pydrive.auth import GoogleAuth
 from pydrive.drive import GoogleDrive, GoogleDriveFile, GoogleDriveFileList
 from playwright.sync_api import sync_playwright, TimeoutError
 from requests_toolbelt.multipart.encoder import MultipartEncoder
 from desktop_env.evaluators.metrics.utils import compare_urls
@@ -46,7 +48,7 @@ class SetupController:
            # Assumes all the setup the functions should follow this name
            # protocol
            setup_function: str = "_{:}_setup".format(config_type)
-            assert hasattr(self, setup_function)
+            assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}'
            getattr(self, setup_function)(**parameters)
            logger.info("SETUP: %s(%s)", setup_function, str(parameters))
@@ -416,3 +418,99 @@ class SetupController:
            # Do not close the context or browser; they will remain open after script ends
            return browser, context
    # google drive setup
    def _googledrive_setup(self, **config):
        """ Clean google drive space (eliminate the impact of previous experiments to reset the environment)
        @args:
            config(Dict[str, Any]): contain keys
                settings_file(str): path to google drive settings file, which will be loaded by pydrive.auth.GoogleAuth()
                operation(List[str]): each operation is chosen from ['delete', 'upload']
                args(List[Dict[str, Any]]): parameters for each operation
            different args dict for different operations:
                for delete:
                    query(str): query pattern string to search files or folder in google drive to delete, please refer to
                        https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string.
                        By default, move all files/folders into trash (can be recovered).
                    trash(bool): whether to delete files permanently or move to trash. By default, trash=True, just move to trash.
                TODO: other operations
        """
        settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml')
        gauth = GoogleAuth(settings_file=settings_file)
        drive = GoogleDrive(gauth)
        for oid, operation in enumerate(config['operation']):
            if operation == 'delete': # delete a specific file
                # query pattern string, by default, remove all files/folders not in the trash to the trash
                params = config['args'][oid]
                q = params.get('query', 'trashed = false')
                trash = params.get('trash', True)
                filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList()
                for file in filelist:
                    file: GoogleDriveFile
                    # note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted
                    # this is the same for UnTrash
                    if trash: file.Trash()
                    else: file.Delete()
            elif operation == 'upload':
                pass
            else:
                raise ValueError('[ERROR]: not implemented clean type!')
    def _login_setup(self, **config):
        """ Login to a website with account and password information.
        @args:
            config(Dict[str, Any]): contain keys
                settings_file(str): path to the settings file
                platform(str): platform to login, implemented platforms include:
                    googledrive: https://drive.google.com/drive/my-drive
        """
        host = self.vm_ip
        port = 9222  # fixme: this port is hard-coded, need to be changed from config file
        remote_debugging_url = f"http://{host}:{port}"
        with sync_playwright() as p:
            browser = None
            for attempt in range(15):
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    break
                except Exception as e:
                    if attempt < 14:
                        logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
                        time.sleep(1)
                    else:
                        logger.error(f"Failed to connect after multiple attempts: {e}")
                        raise e
            if not browser:
                return
            context = browser.contexts[0]
            platform = config['platform']
            if platform == 'googledrive':
                url = 'https://drive.google.com/drive/my-drive'
                page = context.new_page()  # Create a new page (tab) within the existing context
                page.goto(url)
                logger.info(f"Opened new page: {url}")
                settings = json.load(open(config['settings_file']))
                email, password = settings['account'], settings['password']
                try:
                    page.wait_for_selector('input[type="email"]', state="visible", timeout=3000)
                    page.fill('input[type="email"]', email)
                    page.click('#identifierNext > div > button')
                    page.wait_for_selector('input[type="password"]', state="visible", timeout=5000)
                    page.fill('input[type="password"]', password)
                    page.click('#passwordNext > div > button')
                    page.wait_for_load_state('load', timeout=5000)
                except TimeoutError:
                    logger.info('[ERROR]: timeout when waiting for google drive login page to load!')
                    return
            else:
                raise NotImplementedError
            return browser, context
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -1,7 +1,6 @@
 from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \
    get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \
-    get_profile_name, \
+    get_profile_name, get_number_of_search_results, get_googledrive_file
    get_number_of_search_results
 from .file import get_cloud_file, get_vm_file, get_cache_file
 from .general import get_vm_command_line
 from .impress import get_audio_in_slide
@@ -9,4 +8,4 @@ from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
 from .misc import get_rule, get_accessibility_tree
 from .replay import get_replay
 from .vlc import get_vlc_playing_info, get_vlc_config
-from .vscode import get_vscode_config
+from .vscode import get_vscode_config
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -2,8 +2,9 @@ import json
 import logging
 import os
 import sqlite3
-from typing import Dict
+from typing import Dict, Any
-
+from pydrive.auth import GoogleAuth
 from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile
 from playwright.sync_api import sync_playwright
 logger = logging.getLogger("desktopenv.getters.chrome")
@@ -461,3 +462,49 @@ def get_number_of_search_results(env, config: Dict[str, str]):
        browser.close()
    return actual_count
 def get_googledrive_file(env, config: Dict[str, Any]) -> str:
    """ Get the desired file from Google Drive based on config, return the downloaded local filepath.
    """
    settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
    auth = GoogleAuth(settings_file=settings_file)
    drive = GoogleDrive(auth)
    q = config['query']
    filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList()
    if len(filelist) == 0: # target file not found
        return None
    file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one
    _path = os.path.join(env.cache_dir, config['dest'])
    try:
        file.GetContentFile(_path, mimetype=file.metadata['mimeType'])
    except:
        logger.info('[ERROR]: Failed to download the file from Google Drive')
        return None
    return _path
 def get_googledrive_file(env, config: Dict[str, Any]) -> str:
    """ Get the desired file from Google Drive based on config, return the downloaded local filepath.
    """
    settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
    auth = GoogleAuth(settings_file=settings_file)
    drive = GoogleDrive(auth)
    q = config['query']
    filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList()
    if len(filelist) == 0: # target file not found
        return None
    file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one
    _path = os.path.join(env.cache_dir, config['dest'])
    try:
        file.GetContentFile(_path, mimetype=file.metadata['mimeType'])
    except:
        logger.info('[ERROR]: Failed to download the file from Google Drive')
        return None
    return _path
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,5 +1,5 @@
 from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop, check_font_size, \
-                     check_enabled_experiments, check_history_deleted
+                     check_enabled_experiments, check_history_deleted, is_expected_search_query
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers, compare_docx_lines
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
    compare_insert_equation, compare_highlighted_text
@@ -20,4 +20,4 @@ from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
 from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
    compare_videos, check_qt_bgcone, check_one_instance_when_started_from_file,check_qt_minimal_view, check_qt_max_volume, \
    check_qt_slider_colours, check_global_key_play_pause
-from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings
+from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -1,4 +1,4 @@
-import logging
+import logging, re
 from typing import Any, Dict, List
 import fitz  # PyMuPDF
@@ -44,6 +44,15 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
        raise TypeError(f"{rule['type']} not support yet!")
 def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, Any]) -> float:
    expected = rules['expect']
    pattern = expected['pattern']
    matched = re.search(pattern, active_tab_info['url'])
    if matched:
        return 1.
    return 0.
 def compare_pdfs(pdf1_path, pdf2_path):
    """
    Compare two PDF files.
@@ -56,11 +65,14 @@ def compare_pdfs(pdf1_path, pdf2_path):
            for page in pdf:
                text += page.get_text()
        return text.strip()
    try:
        text1 = extract_text_from_pdf(pdf1_path)
        text2 = extract_text_from_pdf(pdf2_path)
-    text1 = extract_text_from_pdf(pdf1_path)
+        return fuzz.ratio(text1, text2) / 100
-    text2 = extract_text_from_pdf(pdf2_path)
+    except Exception as e:
-
+        logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
-    return fuzz.ratio(text1, text2) / 100
+        return 0.0
 def is_cookie_deleted(cookie_data, rule):
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -61,6 +61,18 @@ def execute_command():
        }), 500
 def _get_machine_architecture() -> str:
    """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
    """
    architecture = platform.machine().lower()
    if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']:
        return 'amd'
    elif architecture in ['arm64', 'aarch64', 'aarch32']:
        return 'arm'
    else:
        return 'unknown'
@app.route('/setup/launch', methods=["POST"])
 def launch_app():
    data = request.json
@@ -71,6 +83,9 @@ def launch_app():
        command = shlex.split(command)
    try:
        if 'google-chrome' in command and _get_machine_architecture() == 'arm':
            index = command.index('google-chrome')
            command[index] = 'chromium-browser' # arm64 chrome is not available yet, can only use chromium
        subprocess.Popen(command, shell=shell)
        return "{:} launched successfully".format(command if shell else " ".join(command))
    except Exception as e:
@@ -287,6 +302,11 @@ def _create_atspi_node(node: Accessible) -> _Element:
    )
    if "text" in locals() and len(text) > 0:
        xml_node.text = text
    # HACK: libreoffice has a problem -> billions of children for parent with RoleName "document spreadsheet"
    if node.getRoleName() == "document spreadsheet":
        return xml_node
    for ch in node:
        xml_node.append(_create_atspi_node(ch))
    return xml_node
--- a/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
+++ b/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
@@ -0,0 +1,93 @@
 {
    "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
    "snapshot": "chrome",
    "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in Google Drive.",
    "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive",
    "config": [
        {
            "type": "googledrive",
            "parameters": {
                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
                "operation": ["delete"],
                "args": [
                    {
                        "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf'",
                        "trash": false
                    }
                ]
            }
        },
        {
            "type": "launch",
            "parameters": {
                "command": [
                    "chromium-browser",
                    "--remote-debugging-port=1337"
                ]
            }
        },
        {
            "type": "launch",
            "parameters": {
                "command": [
                    "socat",
                    "tcp-listen:9222,fork",
                    "tcp:localhost:1337"
                ]
            }
        },
        {
            "type": "chrome_open_tabs",
            "parameters": {
                "urls_to_open": [
                    "https://www.zhihu.com/",
                    "https://www.coursera.org/",
                    "https://www.wikidata.org/wiki/Wikidata:Main_Page"
                ]
            }
        },
        {
            "type": "login",
            "parameters": {
                "settings_file": "evaluation_examples/settings/google/settings.json",
                "platform": "googledrive"
            }
        },
        {
            "type": "download",
            "parameters": {
              "files": [
                {
                  "url": "https://drive.usercontent.google.com/download?id=18TvzE8jnULU2g9XJsT-TaPEKcLGNVfu0&export=download&authuser=0&confirm=t&uuid=d914e031-9aa6-431b-81c0-73fcb87af027&at=APZUnTUx56WM_I3gnhHo-eZX__kx:1706158167271",
                  "path": "/home/user/Desktop/form.docx"
                }
              ]
            }
          },
          {
            "type": "open",
            "parameters": {
              "path": "/home/user/Desktop/form.docx"
            }
          }
    ],
    "trajectory": "trajectories/",
    "related_apps": [
        "libreoffice_writer",
        "chrome"
    ],
    "evaluator": {
        "func": "compare_pdfs",
        "result": {
            "type": "googledrive_file",
            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
            "query": "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false",
            "dest": "form.pdf"
        },
        "expected": {
            "type": "cloud_file",
            "path": "https://drive.usercontent.google.com/download?id=118wb7zmG8yP7DS1cImP9-GcOeKib3fLp&export=download&authuser=0&confirm=t&uuid=b82542fa-7731-4014-8ebc-d940f0fb83fe&at=APZUnTVkmL9rk3EpA0Ak5JLPEnJZ:1706101389421",
            "dest": "form_gold.pdf"
        }
    }
 }
--- a/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json
+++ b/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json
@@ -0,0 +1,73 @@
 {
    "id": "f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
    "snapshot": "chrome",
    "instruction": "Could you help me copy the data in Cell B6 in this Libreoffice Calc file and search it in the Chrome browser.",
    "source": "https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard",
    "config": [
        {
            "type": "download",
            "parameters": {
              "files": [
                {
                  "url": "https://drive.usercontent.google.com/download?id=1MHk40tzMiyOwNbRj1W-_A-nIisk1TbuM&export=download&authuser=0&confirm=t&uuid=a8674a99-fb5f-4aeb-98ca-338197d73890&at=APZUnTW2thoQGMqgEyD0lNcRISJ_:1706006672423",
                  "path": "/home/user/cell_search.xlsx"
                }
              ]
            }
          },
          {
            "type": "launch",
            "parameters": {
              "command": [
                "google-chrome",
                "--remote-debugging-port=1337"
              ]
            }
          },
          {
            "type": "launch",
            "parameters": {
              "command": [
                "socat",
                "tcp-listen:9222,fork",
                "tcp:localhost:1337"
              ]
            }
          },
          {
            "type": "chrome_open_tabs",
            "parameters": {
              "urls_to_open": [
                "https://en.wikipedia.org/wiki/Main_Page",
                "https://pytorch.org/tutorials/",
                "https://releases.ubuntu.com/"
              ]
            }
          },
          {
            "type": "open",
            "parameters": {
              "path": "/home/user/cell_search.xlsx"
            }
          }
    ],
    "trajectory": "trajectories/",
    "related_apps": [
      "libreoffice_calc",
      "chrome"
    ],
    "evaluator": {
      "func": "is_expected_search_query",
      "result": {
        "type": "active_tab_info"
      },
      "expected": {
        "type": "rule",
        "rules": {
            "expect": {
                "pattern": "www\\.google\\.com.*?/search\\?q=Nereida&"
            }
        }
      }
    }
 }
--- a/evaluation_examples/settings/google/settings.json
+++ b/evaluation_examples/settings/google/settings.json
@@ -0,0 +1,4 @@
 {
    "account": "xlang2024anonym@gmail.com",
    "password": "q]wN~0iD>H:6"
 }
--- a/evaluation_examples/settings/googledrive/client_secrets.json
+++ b/evaluation_examples/settings/googledrive/client_secrets.json
@@ -0,0 +1,13 @@
 {
    "installed": {
        "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com",
        "project_id": "xlang-2024-benchmarking",
        "auth_uri": "https://accounts.google.com/o/oauth2/auth",
        "token_uri": "https://oauth2.googleapis.com/token",
        "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
        "client_secret": "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",
        "redirect_uris": [
            "http://localhost"
        ]
    }
 }
--- a/evaluation_examples/settings/googledrive/credentials.json
+++ b/evaluation_examples/settings/googledrive/credentials.json
@@ -0,0 +1 @@
 {"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-25T05:51:17Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
--- a/evaluation_examples/settings/googledrive/settings.yml
+++ b/evaluation_examples/settings/googledrive/settings.yml
@@ -0,0 +1,8 @@
 client_config_backend: file
 client_config_file: evaluation_examples/settings/googledrive/client_secrets.json
 save_credentials: True
 save_credentials_backend: file
 save_credentials_file: evaluation_examples/settings/googledrive/credentials.json
 get_refresh_token: True
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,3 +34,4 @@ chardet
 playwright
 backoff
 formulas
 pydrive
		`@@ -0,0 +1 @@`
							{"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-25T05:51:17Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAHYEB_El-ry3ChNYPLX14Tmq9Nq5j640ghesztCRfoMEPMW79ENOZT8Ab-5wdY7UFxL2ih5l5nhsBf3QENT8W7l7X3QmtQrhyUnDzCfWW2tzYxrNR34iisY0OFWAR9JSfyQPvkpkWQOq0T0QGkBZ8Rg60PVGraNgaCgYKAfQSARISFQHGX2MiTg-uFBVMQnVo8b2H7tuOqw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}