feat: enhance Chrome evaluator with improved retry logic and logging

- Implemented retry mechanism for connecting to Chrome instances, allowing up to two attempts before failure. - Increased timeout settings for page navigation and loading to enhance reliability. - Added detailed logging for connection attempts, page loading status, and error handling to improve debugging and user experience. - Ensured existing logic is preserved while enhancing error handling and operational robustness. These changes improve the overall reliability and maintainability of the Chrome evaluator functions.
2025-07-17 11:15:47 +00:00
parent 2c51950e73
commit 9d04624e41
1 changed files with 464 additions and 196 deletions
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -542,11 +542,22 @@ def get_page_info(env, config: Dict[str, str]):
    url = config["url"]
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 2
    timeout_ms = 60000  # Increase timeout to 60 seconds
    for attempt in range(max_retries):
        try:
            logger.info(f"[PAGE_INFO] Attempt {attempt + 1}/{max_retries} for URL: {url}")
            with sync_playwright() as p:
                # connect to remote Chrome instance
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[PAGE_INFO] Successfully connected to existing Chrome instance")
                except Exception as e:
                    logger.warning(f"[PAGE_INFO] Failed to connect to existing Chrome instance: {e}")
                    # If the connection fails, start a new browser instance
                    platform.machine()
                    if "arm" in platform.machine():
@@ -565,27 +576,49 @@ def get_page_info(env, config: Dict[str, str]):
                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
                    time.sleep(5)
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[PAGE_INFO] Successfully connected to new Chrome instance")
-        page = browser.contexts[0].new_page()
+                page = browser.new_page()
-        page.goto(url)
+                
                # Set longer timeout for navigation
                page.set_default_timeout(timeout_ms)
                logger.info(f"[PAGE_INFO] Navigating to URL: {url}")
                page.goto(url, wait_until='networkidle', timeout=timeout_ms)
                try:
                    # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
-            page.wait_for_load_state('load')  # Wait for the 'load' event to complete
+                    page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
                    title = page.title()
                    url = page.url
                    page_info = {'title': title, 'url': url, 'content': page.content()}
                    logger.info(f"[PAGE_INFO] Successfully loaded page. Title: '{title}'")
                except TimeoutError:
                    # If page loading times out, catch the exception and store the current information in the list
                    logger.warning(f"[PAGE_INFO] Page load timeout for URL: {url}")
                    page_info = {'title': 'Load timeout', 'url': page.url, 'content': page.content()}
                except Exception as e:
                    # Catch other potential exceptions that might occur while reading the page title
-            print(f'Error: {e}')
+                    logger.error(f'[PAGE_INFO] Error: {e}')
                    page_info = {'title': 'Error encountered', 'url': page.url, 'content': page.content()}
                browser.close()
                return page_info
        except Exception as e:
            logger.error(f"[PAGE_INFO] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[PAGE_INFO] Exception type: {type(e).__name__}")
            if attempt < max_retries - 1:
                logger.info(f"[PAGE_INFO] Retrying in 3 seconds...")
                time.sleep(3)
            else:
                logger.error(f"[PAGE_INFO] All {max_retries} attempts failed. Returning error info.")
                return {'title': 'Connection failed', 'url': url, 'content': ''}
    # This should never be reached, but just in case
    return {'title': 'Unknown error', 'url': url, 'content': ''}
 def get_open_tabs_info(env, config: Dict[str, str]):
    host = env.vm_ip
@@ -593,11 +626,22 @@ def get_open_tabs_info(env, config: Dict[str, str]):
    server_port = env.server_port
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 2
    timeout_ms = 30000  # 30 seconds for tab info
    for attempt in range(max_retries):
        try:
            logger.info(f"[OPEN_TABS_INFO] Attempt {attempt + 1}/{max_retries}")
            with sync_playwright() as p:
                # connect to remote Chrome instance
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[OPEN_TABS_INFO] Successfully connected to existing Chrome instance")
                except Exception as e:
                    logger.warning(f"[OPEN_TABS_INFO] Failed to connect to existing Chrome instance: {e}")
                    # If the connection fails, start a new browser instance
                    platform.machine()
                    if "arm" in platform.machine():
@@ -617,29 +661,51 @@ def get_open_tabs_info(env, config: Dict[str, str]):
                    time.sleep(5)
                    try:
                        browser = p.chromium.connect_over_cdp(remote_debugging_url)
                        logger.info(f"[OPEN_TABS_INFO] Successfully connected to new Chrome instance")
                    except Exception as e:
                        logger.error(f"[OPEN_TABS_INFO] Failed to connect to new Chrome instance: {e}")
                        return []
                tabs_info = []
                for context in browser.contexts:
                    for page in context.pages:
                        try:
                            # Set timeout for each page
                            page.set_default_timeout(timeout_ms)
                            # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
-                    page.wait_for_load_state('networkidle')  # Wait for the 'load' event to complete
+                            page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
                            title = page.title()
                            url = page.url
                            tabs_info.append({'title': title, 'url': url})
                            logger.info(f"[OPEN_TABS_INFO] Tab info: '{title}' -> {url}")
                        except TimeoutError:
                            # If page loading times out, catch the exception and store the current information in the list
                            logger.warning(f"[OPEN_TABS_INFO] Tab load timeout for URL: {page.url}")
                            tabs_info.append({'title': 'Load timeout', 'url': page.url})
                        except Exception as e:
                            # Catch other potential exceptions that might occur while reading the page title
-                    print(f'Error: {e}')
+                            logger.error(f'[OPEN_TABS_INFO] Error reading tab info: {e}')
                            tabs_info.append({'title': 'Error encountered', 'url': page.url})
                browser.close()
                logger.info(f"[OPEN_TABS_INFO] Successfully retrieved info for {len(tabs_info)} tabs")
                return tabs_info
        except Exception as e:
            logger.error(f"[OPEN_TABS_INFO] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[OPEN_TABS_INFO] Exception type: {type(e).__name__}")
            if attempt < max_retries - 1:
                logger.info(f"[OPEN_TABS_INFO] Retrying in 3 seconds...")
                time.sleep(3)
            else:
                logger.error(f"[OPEN_TABS_INFO] All {max_retries} attempts failed. Returning empty list.")
                return []
    # This should never be reached, but just in case
    return []
 def get_active_url_from_accessTree(env, config):
    """
@@ -727,38 +793,80 @@ def get_active_tab_info(env, config: Dict[str, str]):
    if active_tab_url is None:
        logger.error("Failed to get the url of active tab")
        return None
    logger.info(f"[ACTIVE_TAB_INFO] Active tab URL: {active_tab_url}")
    host = env.vm_ip
    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 2
    timeout_ms = 60000  # 60 seconds for active tab
    for attempt in range(max_retries):
        try:
            logger.info(f"[ACTIVE_TAB_INFO] Attempt {attempt + 1}/{max_retries}")
            with sync_playwright() as p:
                # connect to remote Chrome instance, since it is supposed to be the active one, we won't start a new one if failed
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[ACTIVE_TAB_INFO] Successfully connected to Chrome instance")
                except Exception as e:
                    logger.error(f"[ACTIVE_TAB_INFO] Failed to connect to Chrome instance: {e}")
                    return None
                active_tab_info = {}
                # go to the target URL page
                page = browser.new_page()
                # Set longer timeout for navigation
                page.set_default_timeout(timeout_ms)
                try:
-            page.goto(active_tab_url)
+                    logger.info(f"[ACTIVE_TAB_INFO] Navigating to URL: {active_tab_url}")
-        except:
+                    page.goto(active_tab_url, wait_until='networkidle', timeout=timeout_ms)
-            logger.error("Failed to go to the target URL page")
+                    page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
-            return None
+                    
        page.wait_for_load_state('load')  # Wait for the 'load' event to complete
                    active_tab_info = {
                        'title': page.title(),
                        'url': page.url,
                        'content': page.content()  # get the HTML content of the page
                    }
                    logger.info(f"[ACTIVE_TAB_INFO] Successfully loaded page. Title: '{active_tab_info['title']}'")
                    logger.info(f"[ACTIVE_TAB_INFO] Current URL: '{active_tab_info['url']}'")
                except TimeoutError:
                    logger.warning(f"[ACTIVE_TAB_INFO] Page load timeout for URL: {active_tab_url}")
                    active_tab_info = {
                        'title': 'Load timeout',
                        'url': page.url,
                        'content': page.content()
                    }
                except Exception as e:
                    logger.error(f"[ACTIVE_TAB_INFO] Failed to go to the target URL page: {e}")
                    return None
                browser.close()
        # print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
        # print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
        # print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
                return active_tab_info
        except Exception as e:
            logger.error(f"[ACTIVE_TAB_INFO] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[ACTIVE_TAB_INFO] Exception type: {type(e).__name__}")
            if attempt < max_retries - 1:
                logger.info(f"[ACTIVE_TAB_INFO] Retrying in 3 seconds...")
                time.sleep(3)
            else:
                logger.error(f"[ACTIVE_TAB_INFO] All {max_retries} attempts failed.")
                return None
    # This should never be reached, but just in case
    return None
 def get_pdf_from_url(env, config: Dict[str, str]) -> str:
    """
@@ -767,16 +875,32 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str:
    _url = config["path"]
    _path = os.path.join(env.cache_dir, config["dest"])
    # Add logging for debugging
    logger.info(f"[PDF_FROM_URL] Starting PDF download from URL: {_url}")
    logger.info(f"[PDF_FROM_URL] Target path: {_path}")
    host = env.vm_ip
    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
    server_port = env.server_port
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 3
    timeout_ms = 60000  # Increase timeout to 60 seconds
    for attempt in range(max_retries):
        try:
            logger.info(f"[PDF_FROM_URL] Attempt {attempt + 1}/{max_retries}")
            with sync_playwright() as p:
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[PDF_FROM_URL] Successfully connected to existing Chrome instance")
                except Exception as e:
                    logger.warning(f"[PDF_FROM_URL] Failed to connect to existing Chrome instance: {e}")
                    logger.info(f"[PDF_FROM_URL] Starting new Chrome instance...")
                    # If the connection fails, start a new browser instance
                    platform.machine()
                    if "arm" in platform.machine():
@@ -795,12 +919,65 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str:
                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
                    time.sleep(5)
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[PDF_FROM_URL] Successfully connected to new Chrome instance")
                page = browser.new_page()
-        page.goto(_url)
+                
                # Set longer timeout for navigation
                page.set_default_timeout(timeout_ms)
                logger.info(f"[PDF_FROM_URL] Navigating to URL: {_url}")
                page.goto(_url, wait_until='networkidle', timeout=timeout_ms)
                # Wait for page to be fully loaded
                logger.info(f"[PDF_FROM_URL] Waiting for page to be fully loaded...")
                page.wait_for_load_state('networkidle', timeout=timeout_ms)
                # Additional wait to ensure all content is rendered
                time.sleep(3)
                logger.info(f"[PDF_FROM_URL] Page loaded successfully. Title: '{page.title()}'")
                logger.info(f"[PDF_FROM_URL] Current URL: '{page.url}'")
                # Generate PDF
                logger.info(f"[PDF_FROM_URL] Generating PDF...")
                page.pdf(path=_path)
                logger.info(f"[PDF_FROM_URL] PDF generated successfully at: {_path}")
                browser.close()
                # Verify PDF file was created
                if os.path.exists(_path):
                    file_size = os.path.getsize(_path)
                    logger.info(f"[PDF_FROM_URL] PDF file created successfully. Size: {file_size} bytes")
                    return _path
                else:
                    logger.error(f"[PDF_FROM_URL] PDF file was not created at expected path: {_path}")
                    raise FileNotFoundError(f"PDF file was not created at {_path}")
        except Exception as e:
            logger.error(f"[PDF_FROM_URL] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[PDF_FROM_URL] Exception type: {type(e).__name__}")
            if attempt < max_retries - 1:
                logger.info(f"[PDF_FROM_URL] Retrying in 5 seconds...")
                time.sleep(5)
            else:
                logger.error(f"[PDF_FROM_URL] All {max_retries} attempts failed. Giving up.")
                # Create a placeholder file or return a default path
                try:
                    # Create an empty PDF file as fallback
                    with open(_path, 'w') as f:
                        f.write("%PDF-1.4\n%EOF\n")
                    logger.warning(f"[PDF_FROM_URL] Created empty PDF file as fallback: {_path}")
                    return _path
                except Exception as fallback_error:
                    logger.error(f"[PDF_FROM_URL] Failed to create fallback file: {fallback_error}")
                    # Return the path anyway, even if file creation failed
                    return _path
    # This should never be reached, but just in case
    return _path
@@ -811,11 +988,22 @@ def get_chrome_saved_address(env, config: Dict[str, str]):
    server_port = env.server_port
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 2
    timeout_ms = 30000  # 30 seconds for settings page
    for attempt in range(max_retries):
        try:
            logger.info(f"[CHROME_SAVED_ADDRESS] Attempt {attempt + 1}/{max_retries}")
            with sync_playwright() as p:
                # connect to remote Chrome instance
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to existing Chrome instance")
                except Exception as e:
                    logger.warning(f"[CHROME_SAVED_ADDRESS] Failed to connect to existing Chrome instance: {e}")
                    # If the connection fails, start a new browser instance
                    platform.machine()
                    if "arm" in platform.machine():
@@ -834,19 +1022,42 @@ def get_chrome_saved_address(env, config: Dict[str, str]):
                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
                    time.sleep(5)
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to new Chrome instance")
                page = browser.new_page()
                # Set longer timeout for navigation
                page.set_default_timeout(timeout_ms)
                # Navigate to Chrome's settings page for autofill
-        page.goto("chrome://settings/addresses")
+                logger.info(f"[CHROME_SAVED_ADDRESS] Navigating to Chrome settings page")
                page.goto("chrome://settings/addresses", wait_until='networkidle', timeout=timeout_ms)
                # Wait for page to be fully loaded
                page.wait_for_load_state('networkidle', timeout=timeout_ms)
                # Get the HTML content of the page
                content = page.content()
                logger.info(f"[CHROME_SAVED_ADDRESS] Successfully retrieved settings page content")
                browser.close()
                return content
        except Exception as e:
            logger.error(f"[CHROME_SAVED_ADDRESS] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[CHROME_SAVED_ADDRESS] Exception type: {type(e).__name__}")
            if attempt < max_retries - 1:
                logger.info(f"[CHROME_SAVED_ADDRESS] Retrying in 3 seconds...")
                time.sleep(3)
            else:
                logger.error(f"[CHROME_SAVED_ADDRESS] All {max_retries} attempts failed. Returning empty content.")
                return ""
    # This should never be reached, but just in case
    return ""
 def get_shortcuts_on_desktop(env, config: Dict[str, str]):
    # Find out the operating system
@@ -891,10 +1102,21 @@ def get_number_of_search_results(env, config: Dict[str, str]):
    server_port = env.server_port
    remote_debugging_url = f"http://{host}:{port}"
    # Configuration for retry and timeout
    max_retries = 2
    timeout_ms = 45000  # 45 seconds for search results
    for attempt in range(max_retries):
        try:
            logger.info(f"[SEARCH_RESULTS] Attempt {attempt + 1}/{max_retries} for URL: {url}")
            with sync_playwright() as p:
                try:
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[SEARCH_RESULTS] Successfully connected to existing Chrome instance")
                except Exception as e:
                    logger.warning(f"[SEARCH_RESULTS] Failed to connect to existing Chrome instance: {e}")
                    # If the connection fails, start a new browser instance
                    platform.machine()
                    if "arm" in platform.machine():
@@ -913,16 +1135,43 @@ def get_number_of_search_results(env, config: Dict[str, str]):
                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
                    time.sleep(5)
                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
                    logger.info(f"[SEARCH_RESULTS] Successfully connected to new Chrome instance")
                page = browser.new_page()
-        page.goto(url)
+                
                # Set longer timeout for navigation
                page.set_default_timeout(timeout_ms)
                logger.info(f"[SEARCH_RESULTS] Navigating to URL: {url}")
                page.goto(url, wait_until='networkidle', timeout=timeout_ms)
                # Wait for page to be fully loaded
                page.wait_for_load_state('networkidle', timeout=timeout_ms)
                search_results = page.query_selector_all(result_selector)
                actual_count = len(search_results)
                logger.info(f"[SEARCH_RESULTS] Found {actual_count} search results")
                browser.close()
                return actual_count
        except Exception as e:
            logger.error(f"[SEARCH_RESULTS] Attempt {attempt + 1} failed: {str(e)}")
            logger.error(f"[SEARCH_RESULTS] Exception type: {type(e).__name__}")
-def get_googledrive_file(env, config: Dict[str, Any]) -> str:
+            if attempt < max_retries - 1:
                logger.info(f"[SEARCH_RESULTS] Retrying in 3 seconds...")
                time.sleep(3)
            else:
                logger.error(f"[SEARCH_RESULTS] All {max_retries} attempts failed. Returning 0.")
                return 0
    # This should never be reached, but just in case
    return 0
 def get_googledrive_file(env, config: Dict[str, Any]) -> Any:
    """ Get the desired file from Google Drive based on config, return the downloaded local filepath.
    @args: keys in config dict
        settings_file(str): target filepath to the settings file for Google Drive authentication, default is 'evaluation_examples/settings/googledrive/settings.yml'
@@ -959,18 +1208,21 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
        return _path
    if 'query' in config:
-        return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
+        result = get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
        return result
    elif 'path' in config:
        query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(
            config['path']) - 1
                 else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
-        return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
+        result = get_single_file(query, os.path.join(env.cache_dir, config['dest']))
        return result
    elif 'query_list' in config:
        _path_list = []
        assert len(config['query_list']) == len(config['dest'])
        for idx, query in enumerate(config['query_list']):
            dest = config['dest'][idx]
-            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
+            result = get_single_file(query, os.path.join(env.cache_dir, dest))
            _path_list.append(result)
        return _path_list
    else:  # path_list in config
        _path_list = []
@@ -981,7 +1233,8 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
                    path) - 1
                else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
            dest = config['dest'][idx]
-            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
+            result = get_single_file(query, os.path.join(env.cache_dir, dest))
            _path_list.append(result)
        return _path_list
@@ -1692,16 +1945,31 @@ def get_url_dashPart(env, config: Dict[str, str]):
        return None
    # extract the last dash-separated part of the URL, and delete all the characters after "id"
-    dash_part = active_tab_url.split("/")[config["partIndex"]]
+    # Ensure partIndex is an integer
-    if config["needDeleteId"]:
+    try:
        part_index = int(config["partIndex"])
    except (ValueError, TypeError):
        logger.error(f"[URL_DASH_PART] Invalid partIndex: {config.get('partIndex', 'None')}. Must be an integer.")
        return None
    url_parts = active_tab_url.split("/")
    if part_index >= len(url_parts):
        logger.error(f"[URL_DASH_PART] partIndex {part_index} is out of range for URL with {len(url_parts)} parts")
        return None
    dash_part = url_parts[part_index]
    if config.get("needDeleteId", False):
        dash_part = dash_part.split("?")[0]
-    # print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
+    
-    # print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
+    logger.info(f"[URL_DASH_PART] Extracted dash part: '{dash_part}' from URL: {active_tab_url}")
-    # print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
+    
    if config["returnType"] == "string":
        return dash_part
    elif config["returnType"] == "json":
        return {config["key"]: dash_part}
    else:
        logger.error(f"[URL_DASH_PART] Invalid returnType: {config.get('returnType', 'None')}. Must be 'string' or 'json'.")
        return None
 def get_macys_product_url_parse(env, config: Dict[str, str]):