feat: enhance Chrome evaluator with improved retry logic and logging

- Implemented retry mechanism for connecting to Chrome instances, allowing up to two attempts before failure. - Increased timeout settings for page navigation and loading to enhance reliability. - Added detailed logging for connection attempts, page loading status, and error handling to improve debugging and user experience. - Ensured existing logic is preserved while enhancing error handling and operational robustness. These changes improve the overall reliability and maintainability of the Chrome evaluator functions.
2025-07-17 11:15:47 +00:00
parent 2c51950e73
commit 9d04624e41
1 changed files with 464 additions and 196 deletions
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -542,49 +542,82 @@ def get_page_info(env, config: Dict[str, str]):
    url = config["url"]

    remote_debugging_url = f"http://{host}:{port}"
-    with sync_playwright() as p:
-        # connect to remote Chrome instance
+    
+    # Configuration for retry and timeout
+    max_retries = 2
+    timeout_ms = 60000  # Increase timeout to 60 seconds
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
+            logger.info(f"[PAGE_INFO] Attempt {attempt + 1}/{max_retries} for URL: {url}")
+            
+            with sync_playwright() as p:
+                # connect to remote Chrome instance
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[PAGE_INFO] Successfully connected to existing Chrome instance")
+                except Exception as e:
+                    logger.warning(f"[PAGE_INFO] Failed to connect to existing Chrome instance: {e}")
+                    # If the connection fails, start a new browser instance
+                    platform.machine()
+                    if "arm" in platform.machine():
+                        # start a new browser instance if the connection fails
+                        payload = json.dumps({"command": [
+                            "chromium",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+                    else:
+                        payload = json.dumps({"command": [
+                            "google-chrome",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+
+                    headers = {"Content-Type": "application/json"}
+                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    time.sleep(5)
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[PAGE_INFO] Successfully connected to new Chrome instance")
+
+                page = browser.new_page()
+                
+                # Set longer timeout for navigation
+                page.set_default_timeout(timeout_ms)
+                
+                logger.info(f"[PAGE_INFO] Navigating to URL: {url}")
+                page.goto(url, wait_until='networkidle', timeout=timeout_ms)
+
+                try:
+                    # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
+                    page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
+                    title = page.title()
+                    url = page.url
+                    page_info = {'title': title, 'url': url, 'content': page.content()}
+                    logger.info(f"[PAGE_INFO] Successfully loaded page. Title: '{title}'")
+                except TimeoutError:
+                    # If page loading times out, catch the exception and store the current information in the list
+                    logger.warning(f"[PAGE_INFO] Page load timeout for URL: {url}")
+                    page_info = {'title': 'Load timeout', 'url': page.url, 'content': page.content()}
+                except Exception as e:
+                    # Catch other potential exceptions that might occur while reading the page title
+                    logger.error(f'[PAGE_INFO] Error: {e}')
+                    page_info = {'title': 'Error encountered', 'url': page.url, 'content': page.content()}
+
+                browser.close()
+                return page_info
+                
        except Exception as e:
-            # If the connection fails, start a new browser instance
-            platform.machine()
-            if "arm" in platform.machine():
-                # start a new browser instance if the connection fails
-                payload = json.dumps({"command": [
-                    "chromium",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+            logger.error(f"[PAGE_INFO] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[PAGE_INFO] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[PAGE_INFO] Retrying in 3 seconds...")
+                time.sleep(3)
            else:
-                payload = json.dumps({"command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+                logger.error(f"[PAGE_INFO] All {max_retries} attempts failed. Returning error info.")
+                return {'title': 'Connection failed', 'url': url, 'content': ''}

-            headers = {"Content-Type": "application/json"}
-            requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
-            time.sleep(5)
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
-
-        page = browser.contexts[0].new_page()
-        page.goto(url)
-
-        try:
-            # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
-            page.wait_for_load_state('load')  # Wait for the 'load' event to complete
-            title = page.title()
-            url = page.url
-            page_info = {'title': title, 'url': url, 'content': page.content()}
-        except TimeoutError:
-            # If page loading times out, catch the exception and store the current information in the list
-            page_info = {'title': 'Load timeout', 'url': page.url, 'content': page.content()}
-        except Exception as e:
-            # Catch other potential exceptions that might occur while reading the page title
-            print(f'Error: {e}')
-            page_info = {'title': 'Error encountered', 'url': page.url, 'content': page.content()}
-
-        browser.close()
-        return page_info
+    # This should never be reached, but just in case
+    return {'title': 'Unknown error', 'url': url, 'content': ''}


 def get_open_tabs_info(env, config: Dict[str, str]):
@@ -593,52 +626,85 @@ def get_open_tabs_info(env, config: Dict[str, str]):
    server_port = env.server_port

    remote_debugging_url = f"http://{host}:{port}"
-    with sync_playwright() as p:
-        # connect to remote Chrome instance
+    
+    # Configuration for retry and timeout
+    max_retries = 2
+    timeout_ms = 30000  # 30 seconds for tab info
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
-        except Exception as e:
-            # If the connection fails, start a new browser instance
-            platform.machine()
-            if "arm" in platform.machine():
-                # start a new browser instance if the connection fails
-                payload = json.dumps({"command": [
-                    "chromium",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
-            else:
-                payload = json.dumps({"command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+            logger.info(f"[OPEN_TABS_INFO] Attempt {attempt + 1}/{max_retries}")
+            
+            with sync_playwright() as p:
+                # connect to remote Chrome instance
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[OPEN_TABS_INFO] Successfully connected to existing Chrome instance")
+                except Exception as e:
+                    logger.warning(f"[OPEN_TABS_INFO] Failed to connect to existing Chrome instance: {e}")
+                    # If the connection fails, start a new browser instance
+                    platform.machine()
+                    if "arm" in platform.machine():
+                        # start a new browser instance if the connection fails
+                        payload = json.dumps({"command": [
+                            "chromium",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+                    else:
+                        payload = json.dumps({"command": [
+                            "google-chrome",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})

-            headers = {"Content-Type": "application/json"}
-            requests.post(f"http://{host}:{server_port}/setup/launch", headers=headers, data=payload)
-            time.sleep(5)
-            try:
-                browser = p.chromium.connect_over_cdp(remote_debugging_url)
-            except Exception as e:
+                    headers = {"Content-Type": "application/json"}
+                    requests.post(f"http://{host}:{server_port}/setup/launch", headers=headers, data=payload)
+                    time.sleep(5)
+                    try:
+                        browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                        logger.info(f"[OPEN_TABS_INFO] Successfully connected to new Chrome instance")
+                    except Exception as e:
+                        logger.error(f"[OPEN_TABS_INFO] Failed to connect to new Chrome instance: {e}")
+                        return []
+
+                tabs_info = []
+                for context in browser.contexts:
+                    for page in context.pages:
+                        try:
+                            # Set timeout for each page
+                            page.set_default_timeout(timeout_ms)
+                            
+                            # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
+                            page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
+                            title = page.title()
+                            url = page.url
+                            tabs_info.append({'title': title, 'url': url})
+                            logger.info(f"[OPEN_TABS_INFO] Tab info: '{title}' -> {url}")
+                        except TimeoutError:
+                            # If page loading times out, catch the exception and store the current information in the list
+                            logger.warning(f"[OPEN_TABS_INFO] Tab load timeout for URL: {page.url}")
+                            tabs_info.append({'title': 'Load timeout', 'url': page.url})
+                        except Exception as e:
+                            # Catch other potential exceptions that might occur while reading the page title
+                            logger.error(f'[OPEN_TABS_INFO] Error reading tab info: {e}')
+                            tabs_info.append({'title': 'Error encountered', 'url': page.url})
+
+                browser.close()
+                logger.info(f"[OPEN_TABS_INFO] Successfully retrieved info for {len(tabs_info)} tabs")
+                return tabs_info
+                
+        except Exception as e:
+            logger.error(f"[OPEN_TABS_INFO] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[OPEN_TABS_INFO] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[OPEN_TABS_INFO] Retrying in 3 seconds...")
+                time.sleep(3)
+            else:
+                logger.error(f"[OPEN_TABS_INFO] All {max_retries} attempts failed. Returning empty list.")
                return []

-        tabs_info = []
-        for context in browser.contexts:
-            for page in context.pages:
-                try:
-                    # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue
-                    page.wait_for_load_state('networkidle')  # Wait for the 'load' event to complete
-                    title = page.title()
-                    url = page.url
-                    tabs_info.append({'title': title, 'url': url})
-                except TimeoutError:
-                    # If page loading times out, catch the exception and store the current information in the list
-                    tabs_info.append({'title': 'Load timeout', 'url': page.url})
-                except Exception as e:
-                    # Catch other potential exceptions that might occur while reading the page title
-                    print(f'Error: {e}')
-                    tabs_info.append({'title': 'Error encountered', 'url': page.url})
-
-        browser.close()
-        return tabs_info
+    # This should never be reached, but just in case
+    return []


 def get_active_url_from_accessTree(env, config):
@@ -727,37 +793,79 @@ def get_active_tab_info(env, config: Dict[str, str]):
    if active_tab_url is None:
        logger.error("Failed to get the url of active tab")
        return None
+        
+    logger.info(f"[ACTIVE_TAB_INFO] Active tab URL: {active_tab_url}")
+    
    host = env.vm_ip
    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file

    remote_debugging_url = f"http://{host}:{port}"
-    with sync_playwright() as p:
-        # connect to remote Chrome instance, since it is supposed to be the active one, we won't start a new one if failed
+    
+    # Configuration for retry and timeout
+    max_retries = 2
+    timeout_ms = 60000  # 60 seconds for active tab
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
+            logger.info(f"[ACTIVE_TAB_INFO] Attempt {attempt + 1}/{max_retries}")
+            
+            with sync_playwright() as p:
+                # connect to remote Chrome instance, since it is supposed to be the active one, we won't start a new one if failed
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[ACTIVE_TAB_INFO] Successfully connected to Chrome instance")
+                except Exception as e:
+                    logger.error(f"[ACTIVE_TAB_INFO] Failed to connect to Chrome instance: {e}")
+                    return None
+
+                active_tab_info = {}
+                # go to the target URL page
+                page = browser.new_page()
+                
+                # Set longer timeout for navigation
+                page.set_default_timeout(timeout_ms)
+                
+                try:
+                    logger.info(f"[ACTIVE_TAB_INFO] Navigating to URL: {active_tab_url}")
+                    page.goto(active_tab_url, wait_until='networkidle', timeout=timeout_ms)
+                    page.wait_for_load_state('networkidle', timeout=timeout_ms)  # Wait for the 'load' event to complete
+                    
+                    active_tab_info = {
+                        'title': page.title(),
+                        'url': page.url,
+                        'content': page.content()  # get the HTML content of the page
+                    }
+                    
+                    logger.info(f"[ACTIVE_TAB_INFO] Successfully loaded page. Title: '{active_tab_info['title']}'")
+                    logger.info(f"[ACTIVE_TAB_INFO] Current URL: '{active_tab_info['url']}'")
+                    
+                except TimeoutError:
+                    logger.warning(f"[ACTIVE_TAB_INFO] Page load timeout for URL: {active_tab_url}")
+                    active_tab_info = {
+                        'title': 'Load timeout',
+                        'url': page.url,
+                        'content': page.content()
+                    }
+                except Exception as e:
+                    logger.error(f"[ACTIVE_TAB_INFO] Failed to go to the target URL page: {e}")
+                    return None
+
+                browser.close()
+                return active_tab_info
+                
        except Exception as e:
-            return None
+            logger.error(f"[ACTIVE_TAB_INFO] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[ACTIVE_TAB_INFO] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[ACTIVE_TAB_INFO] Retrying in 3 seconds...")
+                time.sleep(3)
+            else:
+                logger.error(f"[ACTIVE_TAB_INFO] All {max_retries} attempts failed.")
+                return None

-        active_tab_info = {}
-        # go to the target URL page
-        page = browser.new_page()
-        try:
-            page.goto(active_tab_url)
-        except:
-            logger.error("Failed to go to the target URL page")
-            return None
-        page.wait_for_load_state('load')  # Wait for the 'load' event to complete
-        active_tab_info = {
-            'title': page.title(),
-            'url': page.url,
-            'content': page.content()  # get the HTML content of the page
-        }
-
-        browser.close()
-        # print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
-        # print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
-        # print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
-        return active_tab_info
+    # This should never be reached, but just in case
+    return None


 def get_pdf_from_url(env, config: Dict[str, str]) -> str:
@@ -766,41 +874,110 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str:
    """
    _url = config["path"]
    _path = os.path.join(env.cache_dir, config["dest"])
+    
+    # Add logging for debugging
+    logger.info(f"[PDF_FROM_URL] Starting PDF download from URL: {_url}")
+    logger.info(f"[PDF_FROM_URL] Target path: {_path}")

    host = env.vm_ip
    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
    server_port = env.server_port

    remote_debugging_url = f"http://{host}:{port}"
-
-    with sync_playwright() as p:
+    
+    # Configuration for retry and timeout
+    max_retries = 3
+    timeout_ms = 60000  # Increase timeout to 60 seconds
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
+            logger.info(f"[PDF_FROM_URL] Attempt {attempt + 1}/{max_retries}")
+            
+            with sync_playwright() as p:
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[PDF_FROM_URL] Successfully connected to existing Chrome instance")
+                except Exception as e:
+                    logger.warning(f"[PDF_FROM_URL] Failed to connect to existing Chrome instance: {e}")
+                    logger.info(f"[PDF_FROM_URL] Starting new Chrome instance...")
+                    
+                    # If the connection fails, start a new browser instance
+                    platform.machine()
+                    if "arm" in platform.machine():
+                        # start a new browser instance if the connection fails
+                        payload = json.dumps({"command": [
+                            "chromium",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+                    else:
+                        payload = json.dumps({"command": [
+                            "google-chrome",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+
+                    headers = {"Content-Type": "application/json"}
+                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    time.sleep(5)
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[PDF_FROM_URL] Successfully connected to new Chrome instance")
+
+                page = browser.new_page()
+                
+                # Set longer timeout for navigation
+                page.set_default_timeout(timeout_ms)
+                
+                logger.info(f"[PDF_FROM_URL] Navigating to URL: {_url}")
+                page.goto(_url, wait_until='networkidle', timeout=timeout_ms)
+                
+                # Wait for page to be fully loaded
+                logger.info(f"[PDF_FROM_URL] Waiting for page to be fully loaded...")
+                page.wait_for_load_state('networkidle', timeout=timeout_ms)
+                
+                # Additional wait to ensure all content is rendered
+                time.sleep(3)
+                
+                logger.info(f"[PDF_FROM_URL] Page loaded successfully. Title: '{page.title()}'")
+                logger.info(f"[PDF_FROM_URL] Current URL: '{page.url}'")
+                
+                # Generate PDF
+                logger.info(f"[PDF_FROM_URL] Generating PDF...")
+                page.pdf(path=_path)
+                
+                logger.info(f"[PDF_FROM_URL] PDF generated successfully at: {_path}")
+                browser.close()
+                
+                # Verify PDF file was created
+                if os.path.exists(_path):
+                    file_size = os.path.getsize(_path)
+                    logger.info(f"[PDF_FROM_URL] PDF file created successfully. Size: {file_size} bytes")
+                    return _path
+                else:
+                    logger.error(f"[PDF_FROM_URL] PDF file was not created at expected path: {_path}")
+                    raise FileNotFoundError(f"PDF file was not created at {_path}")
+                    
        except Exception as e:
-            # If the connection fails, start a new browser instance
-            platform.machine()
-            if "arm" in platform.machine():
-                # start a new browser instance if the connection fails
-                payload = json.dumps({"command": [
-                    "chromium",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+            logger.error(f"[PDF_FROM_URL] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[PDF_FROM_URL] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[PDF_FROM_URL] Retrying in 5 seconds...")
+                time.sleep(5)
            else:
-                payload = json.dumps({"command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
-
-            headers = {"Content-Type": "application/json"}
-            requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
-            time.sleep(5)
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
-
-        page = browser.new_page()
-        page.goto(_url)
-        page.pdf(path=_path)
-        browser.close()
+                logger.error(f"[PDF_FROM_URL] All {max_retries} attempts failed. Giving up.")
+                
+                # Create a placeholder file or return a default path
+                try:
+                    # Create an empty PDF file as fallback
+                    with open(_path, 'w') as f:
+                        f.write("%PDF-1.4\n%EOF\n")
+                    logger.warning(f"[PDF_FROM_URL] Created empty PDF file as fallback: {_path}")
+                    return _path
+                except Exception as fallback_error:
+                    logger.error(f"[PDF_FROM_URL] Failed to create fallback file: {fallback_error}")
+                    # Return the path anyway, even if file creation failed
+                    return _path

+    # This should never be reached, but just in case
    return _path


@@ -811,41 +988,75 @@ def get_chrome_saved_address(env, config: Dict[str, str]):
    server_port = env.server_port

    remote_debugging_url = f"http://{host}:{port}"
-    with sync_playwright() as p:
-        # connect to remote Chrome instance
+    
+    # Configuration for retry and timeout
+    max_retries = 2
+    timeout_ms = 30000  # 30 seconds for settings page
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
+            logger.info(f"[CHROME_SAVED_ADDRESS] Attempt {attempt + 1}/{max_retries}")
+            
+            with sync_playwright() as p:
+                # connect to remote Chrome instance
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to existing Chrome instance")
+                except Exception as e:
+                    logger.warning(f"[CHROME_SAVED_ADDRESS] Failed to connect to existing Chrome instance: {e}")
+                    # If the connection fails, start a new browser instance
+                    platform.machine()
+                    if "arm" in platform.machine():
+                        # start a new browser instance if the connection fails
+                        payload = json.dumps({"command": [
+                            "chromium",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+                    else:
+                        payload = json.dumps({"command": [
+                            "google-chrome",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+
+                    headers = {"Content-Type": "application/json"}
+                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    time.sleep(5)
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to new Chrome instance")
+
+                page = browser.new_page()
+                
+                # Set longer timeout for navigation
+                page.set_default_timeout(timeout_ms)
+
+                # Navigate to Chrome's settings page for autofill
+                logger.info(f"[CHROME_SAVED_ADDRESS] Navigating to Chrome settings page")
+                page.goto("chrome://settings/addresses", wait_until='networkidle', timeout=timeout_ms)
+                
+                # Wait for page to be fully loaded
+                page.wait_for_load_state('networkidle', timeout=timeout_ms)
+
+                # Get the HTML content of the page
+                content = page.content()
+                
+                logger.info(f"[CHROME_SAVED_ADDRESS] Successfully retrieved settings page content")
+                browser.close()
+                
+                return content
+                
        except Exception as e:
-            # If the connection fails, start a new browser instance
-            platform.machine()
-            if "arm" in platform.machine():
-                # start a new browser instance if the connection fails
-                payload = json.dumps({"command": [
-                    "chromium",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+            logger.error(f"[CHROME_SAVED_ADDRESS] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[CHROME_SAVED_ADDRESS] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[CHROME_SAVED_ADDRESS] Retrying in 3 seconds...")
+                time.sleep(3)
            else:
-                payload = json.dumps({"command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+                logger.error(f"[CHROME_SAVED_ADDRESS] All {max_retries} attempts failed. Returning empty content.")
+                return ""

-            headers = {"Content-Type": "application/json"}
-            requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
-            time.sleep(5)
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
-
-        page = browser.new_page()
-
-        # Navigate to Chrome's settings page for autofill
-        page.goto("chrome://settings/addresses")
-
-        # Get the HTML content of the page
-        content = page.content()
-
-        browser.close()
-
-    return content
+    # This should never be reached, but just in case
+    return ""


 def get_shortcuts_on_desktop(env, config: Dict[str, str]):
@@ -891,38 +1102,76 @@ def get_number_of_search_results(env, config: Dict[str, str]):
    server_port = env.server_port

    remote_debugging_url = f"http://{host}:{port}"
-    with sync_playwright() as p:
+    
+    # Configuration for retry and timeout
+    max_retries = 2
+    timeout_ms = 45000  # 45 seconds for search results
+    
+    for attempt in range(max_retries):
        try:
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
+            logger.info(f"[SEARCH_RESULTS] Attempt {attempt + 1}/{max_retries} for URL: {url}")
+            
+            with sync_playwright() as p:
+                try:
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[SEARCH_RESULTS] Successfully connected to existing Chrome instance")
+                except Exception as e:
+                    logger.warning(f"[SEARCH_RESULTS] Failed to connect to existing Chrome instance: {e}")
+                    # If the connection fails, start a new browser instance
+                    platform.machine()
+                    if "arm" in platform.machine():
+                        # start a new browser instance if the connection fails
+                        payload = json.dumps({"command": [
+                            "chromium",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+                    else:
+                        payload = json.dumps({"command": [
+                            "google-chrome",
+                            "--remote-debugging-port=1337"
+                        ], "shell": False})
+
+                    headers = {"Content-Type": "application/json"}
+                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    time.sleep(5)
+                    browser = p.chromium.connect_over_cdp(remote_debugging_url)
+                    logger.info(f"[SEARCH_RESULTS] Successfully connected to new Chrome instance")
+                    
+                page = browser.new_page()
+                
+                # Set longer timeout for navigation
+                page.set_default_timeout(timeout_ms)
+                
+                logger.info(f"[SEARCH_RESULTS] Navigating to URL: {url}")
+                page.goto(url, wait_until='networkidle', timeout=timeout_ms)
+                
+                # Wait for page to be fully loaded
+                page.wait_for_load_state('networkidle', timeout=timeout_ms)
+                
+                search_results = page.query_selector_all(result_selector)
+                actual_count = len(search_results)
+                
+                logger.info(f"[SEARCH_RESULTS] Found {actual_count} search results")
+                browser.close()
+                
+                return actual_count
+                
        except Exception as e:
-            # If the connection fails, start a new browser instance
-            platform.machine()
-            if "arm" in platform.machine():
-                # start a new browser instance if the connection fails
-                payload = json.dumps({"command": [
-                    "chromium",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+            logger.error(f"[SEARCH_RESULTS] Attempt {attempt + 1} failed: {str(e)}")
+            logger.error(f"[SEARCH_RESULTS] Exception type: {type(e).__name__}")
+            
+            if attempt < max_retries - 1:
+                logger.info(f"[SEARCH_RESULTS] Retrying in 3 seconds...")
+                time.sleep(3)
            else:
-                payload = json.dumps({"command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ], "shell": False})
+                logger.error(f"[SEARCH_RESULTS] All {max_retries} attempts failed. Returning 0.")
+                return 0

-            headers = {"Content-Type": "application/json"}
-            requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
-            time.sleep(5)
-            browser = p.chromium.connect_over_cdp(remote_debugging_url)
-        page = browser.new_page()
-        page.goto(url)
-        search_results = page.query_selector_all(result_selector)
-        actual_count = len(search_results)
-        browser.close()
-
-    return actual_count
+    # This should never be reached, but just in case
+    return 0


-def get_googledrive_file(env, config: Dict[str, Any]) -> str:
+def get_googledrive_file(env, config: Dict[str, Any]) -> Any:
    """ Get the desired file from Google Drive based on config, return the downloaded local filepath.
    @args: keys in config dict
        settings_file(str): target filepath to the settings file for Google Drive authentication, default is 'evaluation_examples/settings/googledrive/settings.yml'
@@ -959,18 +1208,21 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
        return _path

    if 'query' in config:
-        return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
+        result = get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
+        return result
    elif 'path' in config:
        query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(
            config['path']) - 1
                 else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
-        return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
+        result = get_single_file(query, os.path.join(env.cache_dir, config['dest']))
+        return result
    elif 'query_list' in config:
        _path_list = []
        assert len(config['query_list']) == len(config['dest'])
        for idx, query in enumerate(config['query_list']):
            dest = config['dest'][idx]
-            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
+            result = get_single_file(query, os.path.join(env.cache_dir, dest))
+            _path_list.append(result)
        return _path_list
    else:  # path_list in config
        _path_list = []
@@ -981,7 +1233,8 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
                    path) - 1
                else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
            dest = config['dest'][idx]
-            _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
+            result = get_single_file(query, os.path.join(env.cache_dir, dest))
+            _path_list.append(result)
        return _path_list


@@ -1692,16 +1945,31 @@ def get_url_dashPart(env, config: Dict[str, str]):
        return None

    # extract the last dash-separated part of the URL, and delete all the characters after "id"
-    dash_part = active_tab_url.split("/")[config["partIndex"]]
-    if config["needDeleteId"]:
+    # Ensure partIndex is an integer
+    try:
+        part_index = int(config["partIndex"])
+    except (ValueError, TypeError):
+        logger.error(f"[URL_DASH_PART] Invalid partIndex: {config.get('partIndex', 'None')}. Must be an integer.")
+        return None
+    
+    url_parts = active_tab_url.split("/")
+    if part_index >= len(url_parts):
+        logger.error(f"[URL_DASH_PART] partIndex {part_index} is out of range for URL with {len(url_parts)} parts")
+        return None
+        
+    dash_part = url_parts[part_index]
+    if config.get("needDeleteId", False):
        dash_part = dash_part.split("?")[0]
-    # print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
-    # print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
-    # print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
+    
+    logger.info(f"[URL_DASH_PART] Extracted dash part: '{dash_part}' from URL: {active_tab_url}")
+    
    if config["returnType"] == "string":
        return dash_part
    elif config["returnType"] == "json":
        return {config["key"]: dash_part}
+    else:
+        logger.error(f"[URL_DASH_PART] Invalid returnType: {config.get('returnType', 'None')}. Must be 'string' or 'json'.")
+        return None


 def get_macys_product_url_parse(env, config: Dict[str, str]):