diff --git a/branch_flag b/branch_flag index 760637d..9daeafb 100644 --- a/branch_flag +++ b/branch_flag @@ -1 +1 @@ -exp_som +test diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 38646be..a29021b 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -7,6 +7,7 @@ import uuid import tempfile from typing import Any, Union, Optional from typing import Dict, List +import os import requests from pydrive.auth import GoogleAuth @@ -114,6 +115,7 @@ class SetupController: if not os.path.exists(cache_path): max_retries = 3 downloaded = False + e = None for i in range(max_retries): try: response = requests.get(url, stream=True) @@ -128,7 +130,7 @@ class SetupController: break except requests.RequestException as e: - logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") + logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)") if not downloaded: raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}") @@ -344,39 +346,49 @@ class SetupController: port = 9222 # fixme: this port is hard-coded, need to be changed from config file remote_debugging_url = f"http://{host}:{port}" - with sync_playwright() as p: + logger.info("Connect to Chrome @: %s", remote_debugging_url) + logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ)) + for attempt in range(15): + if attempt>0: + time.sleep(5) + browser = None - for attempt in range(15): + with sync_playwright() as p: try: browser = p.chromium.connect_over_cdp(remote_debugging_url) - break + #break except Exception as e: if attempt < 14: logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}") - time.sleep(1) + #time.sleep(10) + continue else: logger.error(f"Failed to connect after multiple attempts: {e}") raise e - if not browser: - return + if not browser: + return - for i, url in enumerate(urls_to_open): - # Use the first context (which should be the only one if using default profile) - if i == 0: - context = browser.contexts[0] + logger.info("Opening %s...", urls_to_open) + for i, url in enumerate(urls_to_open): + # Use the first context (which should be the only one if using default profile) + if i == 0: + context = browser.contexts[0] - page = context.new_page() # Create a new page (tab) within the existing context - page.goto(url, timeout=60000) - logger.info(f"Opened tab {i + 1}: {url}") + page = context.new_page() # Create a new page (tab) within the existing context + try: + page.goto(url, timeout=60000) + except: + logger.warning("Opening %s exceeds time limit", url) # only for human test + logger.info(f"Opened tab {i + 1}: {url}") - if i == 0: - # clear the default tab - default_page = context.pages[0] - default_page.close() + if i == 0: + # clear the default tab + default_page = context.pages[0] + default_page.close() - # Do not close the context or browser; they will remain open after script ends - return browser, context + # Do not close the context or browser; they will remain open after script ends + return browser, context def _chrome_close_tabs_setup(self, urls_to_close: List[str]): time.sleep(5) # Wait for Chrome to finish launching @@ -552,4 +564,4 @@ class SetupController: else: raise NotImplementedError - return browser, context \ No newline at end of file + return browser, context diff --git a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json index 6f2cbf5..06ffe50 100644 --- a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json +++ b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json @@ -12,6 +12,16 @@ "--remote-debugging-port=9222" ] } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } } ], "trajectory": "trajectories/", diff --git a/main.py b/main.py index 627ed5f..471c625 100644 --- a/main.py +++ b/main.py @@ -47,9 +47,9 @@ def human_agent(): Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f: + with open("evaluation_examples/examples/chrome/af630914-714e-4a24-a7bb-f9af687d3b91.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 30" + example["snapshot"] = "Snapshot 35" #env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"