Update loaded Chrome examples

2024-02-23 14:15:16 +08:00
parent 81863b26dd
commit f812436ad3
10 changed files with 396 additions and 35 deletions
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -6,6 +6,7 @@ from .chrome import (
    get_pdf_from_url,
    get_shortcuts_on_desktop,
    get_history,
    get_page_info,
    get_enabled_experiments,
    get_chrome_language,
    get_chrome_font_size,
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -3,9 +3,10 @@ import logging
 import os
 import sqlite3
 from typing import Dict, Any
 from playwright.sync_api import sync_playwright
 from pydrive.auth import GoogleAuth
 from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile
 from playwright.sync_api import sync_playwright
 logger = logging.getLogger("desktopenv.getters.chrome")
@@ -310,6 +311,36 @@ def get_extensions_installed_from_shop(env, config: Dict[str, str]):
 # The following ones require Playwright to be installed on the target machine, and the chrome needs to be pre-config on
 # port info to allow remote debugging, see README.md for details
 def get_page_info(env, config: Dict[str, str]):
    host = env.vm_ip
    port = 9222  # fixme: this port is hard-coded, need to be changed from config file
    url = config["url"]
    remote_debugging_url = f"http://{host}:{port}"
    with sync_playwright() as p:
        # connect to remote Chrome instance
        browser = p.chromium.connect_over_cdp(remote_debugging_url)
        page = browser.contexts[0].new_page()
        page.goto(url)
        try:
            # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
            page.wait_for_load_state('load')  # Wait for the 'load' event to complete
            title = page.title()
            url = page.url
            page_info = {'title': title, 'url': url, 'content': page.content()}
        except TimeoutError:
            # If page loading times out, catch the exception and store the current information in the list
            page_info = {'title': 'Load timeout', 'url': page.url, 'content': page.content()}
        except Exception as e:
            # Catch other potential exceptions that might occur while reading the page title
            print(f'Error: {e}')
            page_info = {'title': 'Error encountered', 'url': page.url, 'content': page.content()}
        browser.close()
        return page_info
 def get_open_tabs_info(env, config: Dict[str, str]):
    host = env.vm_ip
    port = 9222  # fixme: this port is hard-coded, need to be changed from config file
@@ -487,9 +518,9 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
            for q in _query:
                search = f'( {q} ) and "{parent_id}" in parents'
                filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
-                if len(filelist) == 0: # target file not found
+                if len(filelist) == 0:  # target file not found
                    return None
-                file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
+                file: GoogleDriveFile = filelist[0]  # HACK: if multiple candidates, just use the first one
                parent_id = file['id']
            file.GetContentFile(_path, mimetype=file['mimeType'])
@@ -501,8 +532,9 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
    if 'query' in config:
        return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
    elif 'path' in config:
-        query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
+        query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(
-                    else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
+            config['path']) - 1
                 else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
        return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
    elif 'query_list' in config:
        _path_list = []
@@ -511,12 +543,14 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
            dest = config['dest'][idx]
            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
        return _path_list
-    else: # path_list in config
+    else:  # path_list in config
        _path_list = []
        assert len(config['path_list']) == len(config['dest'])
        for idx, path in enumerate(config['path_list']):
-            query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
+            query = [
-                        else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
+                f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(
                    path) - 1
                else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
            dest = config['dest'][idx]
            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
        return _path_list
@@ -545,7 +579,7 @@ def get_enable_do_not_track(env, config: Dict[str, str]):
        content = env.controller.get_file(preference_file_path)
        data = json.loads(content)
-        if_enable_do_not_track = data.get('enable_do_not_track', {}) # bool
+        if_enable_do_not_track = data.get('enable_do_not_track', {})  # bool
        return "true" if if_enable_do_not_track else "false"
    except Exception as e:
        logger.error(f"Error: {e}")
@@ -575,7 +609,7 @@ def get_enable_enhanced_safety_browsing(env, config: Dict[str, str]):
        content = env.controller.get_file(preference_file_path)
        data = json.loads(content)
-        if_enable_do_not_track = data.get('safebrowsing', {}).get('enhanced', {}) # bool
+        if_enable_do_not_track = data.get('safebrowsing', {}).get('enhanced', {})  # bool
        return "true" if if_enable_do_not_track else "false"
    except Exception as e:
        logger.error(f"Error: {e}")
@@ -610,7 +644,7 @@ def get_new_startup_page(env, config: Dict[str, str]):
        if "session" not in data.keys():
            return "true"
        else:
-            if_enable_do_not_track = data.get('session', {}).get('restore_on_startup', {}) # int, need to be 5
+            if_enable_do_not_track = data.get('session', {}).get('restore_on_startup', {})  # int, need to be 5
            return "true" if if_enable_do_not_track == 5 else "false"
    except Exception as e:
        logger.error(f"Error: {e}")
@@ -648,4 +682,4 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
        return all_extensions_path
    except Exception as e:
        logger.error(f"Error: {e}")
-        return "Google"
+        return "Google"
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -16,7 +16,8 @@ from .chrome import (
    check_enabled_experiments,
    check_history_deleted,
    is_expected_search_query,
-    is_expected_active_tab
+    is_expected_active_tab,
    is_added_to_steam_cart
 )
 from .docs import (
    compare_font_names,
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -1,8 +1,12 @@
-import logging, re, os, shutil
+import logging
 import os
 import re
 import shutil
 from typing import Any, Dict, List, Union
-from bs4 import BeautifulSoup, Tag
+
 import fitz  # PyMuPDF
 import rapidfuzz.fuzz as fuzz
 from bs4 import BeautifulSoup, Tag
 from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
@@ -25,6 +29,7 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any]
        logger.error(f"Unknown type: {match_type}")
        return 0
 def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
    """
    Checks if the expected tabs are open in Chrome.
@@ -102,14 +107,14 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
    pred_folder = os.path.splitext(pred_path)[0] + '_pred'
    gold_folder = os.path.splitext(gold_path)[0] + '_gold'
-    if os.path.exists(pred_folder): # remove existing folder for new predictions
+    if os.path.exists(pred_folder):  # remove existing folder for new predictions
        shutil.rmtree(pred_folder, ignore_errors=True)
    os.makedirs(pred_folder)
    shutil.unpack_archive(pred_path, pred_folder)
-    if not os.path.exists(gold_folder): # use cache if exists
+    if not os.path.exists(gold_folder):  # use cache if exists
        os.makedirs(gold_folder)
        shutil.unpack_archive(gold_path, gold_folder)
-    
+
    pred_files = sorted(os.listdir(pred_folder))
    gold_files = sorted(os.listdir(gold_folder))
    if pred_files != gold_files: return 0.
@@ -119,7 +124,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
        if file_type == 'text':
            from .vscode import compare_text_file
            return compare_text_file
-        elif file_type == 'pdf': return compare_pdfs
+        elif file_type == 'pdf':
            return compare_pdfs
        elif file_type == 'docx':
            from .docs import compare_docx_files
            return compare_docx_files
@@ -141,7 +147,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
        elif file_type == 'video':
            from .vlc import compare_videos
            return compare_videos
-        else: raise ValueError('[ERROR]: not support file type: %s' % file_type)
+        else:
            raise ValueError('[ERROR]: not support file type: %s' % file_type)
    score = 0
    compare_function = get_compare_function()
@@ -160,7 +167,7 @@ def compare_htmls(html_path1: str, html_path2: str) -> float:
        soup1 = BeautifulSoup(inf, 'lxml')
    with open(html_path2, 'r', encoding='utf-8') as inf:
        soup2 = BeautifulSoup(inf, 'lxml')
-    
+
    def compare_elements(elem1, elem2):
        if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)):
            return elem1 == elem2
@@ -252,3 +259,18 @@ def check_font_size(font_size, rule):
        return 1. if rule['min'] < default_font_size < rule['max'] else 0.
    else:
        raise TypeError(f"{rule['type']} not support yet!")
 def is_added_to_steam_cart(active_tab_info, rule):
    """
    Check if the item is added to the Steam cart.
    """
    items = rule['items']
    content = active_tab_info['content']
    for item in items:
        if item not in content:
            return 0.
    return 1.
--- a/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json
+++ b/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json
@@ -3,16 +3,66 @@
  "snapshot": "chrome",
  "instruction": "Browse the natural products database.",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://drugs.com"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.drugs.com/npc/"
      }
    }
  }
 }
--- a/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json
+++ b/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json
@@ -3,16 +3,69 @@
  "snapshot": "chrome",
  "instruction": "Find Dota 2 game and add all DLC to cart.",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.dota2.com/home",
          "https://store.steampowered.com"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_added_to_steam_cart",
    "result": {
      "type": "page_info",
      "url": "https://store.steampowered.com/cart/"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "items": [
         "The Dota 2 Official Soundtrack"
        ]
      }
    }
  }
 }
--- a/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json
+++ b/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json
@@ -1,18 +1,68 @@
 {
-  "id": "",
+  "id": "59155008-fe71-45ec-8a8f-dc35497b6aa8",
  "snapshot": "chrome",
-  "instruction": "",
+  "instruction": "What are the similar names to the name carl",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.babycenter.com/child"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.babycenter.com/baby-names/details/carl-853"
      }
    }
  }
 }
--- a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json
+++ b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json
@@ -3,16 +3,66 @@
  "snapshot": "chrome",
  "instruction": "Find the Driver License Eligibility Requirements",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.dmv.virginia.gov/"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.dmv.virginia.gov/licenses-ids/license/applying/eligibility"
      }
    }
  }
 }
--- a/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json
+++ b/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json
@@ -3,16 +3,66 @@
  "snapshot": "chrome",
  "instruction": "Show me the scores for the 2019 super bowl",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.nfl.com/"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.nfl.com/scores/2019/POST4"
      }
    }
  }
 }
--- a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json
+++ b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json
@@ -3,16 +3,66 @@
  "snapshot": "chrome",
  "instruction": "Compare iPhone 15 Pro Max with iPhone 13 Pro Max",
  "source": "Mind2Web",
-  "config": [],
+  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.apple.com/"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
-    "func": "",
+    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.apple.com/iphone/compare/?modelList=iphone-15-pro-max,iphone-15-pro,iphone-13-pro-max"
      }
    }
  }
 }