ver Feb2nd

human evaluation as non-expert on chrome tasks
This commit is contained in:
David Chang
2024-02-02 05:13:12 +08:00
parent 9df0854469
commit 5ee9621e0d
4 changed files with 46 additions and 24 deletions

View File

@@ -1 +1 @@
exp_som test

View File

@@ -7,6 +7,7 @@ import uuid
import tempfile import tempfile
from typing import Any, Union, Optional from typing import Any, Union, Optional
from typing import Dict, List from typing import Dict, List
import os
import requests import requests
from pydrive.auth import GoogleAuth from pydrive.auth import GoogleAuth
@@ -114,6 +115,7 @@ class SetupController:
if not os.path.exists(cache_path): if not os.path.exists(cache_path):
max_retries = 3 max_retries = 3
downloaded = False downloaded = False
e = None
for i in range(max_retries): for i in range(max_retries):
try: try:
response = requests.get(url, stream=True) response = requests.get(url, stream=True)
@@ -128,7 +130,7 @@ class SetupController:
break break
except requests.RequestException as e: except requests.RequestException as e:
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
if not downloaded: if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}") raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
@@ -344,39 +346,49 @@ class SetupController:
port = 9222 # fixme: this port is hard-coded, need to be changed from config file port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}" remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p: logger.info("Connect to Chrome @: %s", remote_debugging_url)
logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
for attempt in range(15):
if attempt>0:
time.sleep(5)
browser = None browser = None
for attempt in range(15): with sync_playwright() as p:
try: try:
browser = p.chromium.connect_over_cdp(remote_debugging_url) browser = p.chromium.connect_over_cdp(remote_debugging_url)
break #break
except Exception as e: except Exception as e:
if attempt < 14: if attempt < 14:
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}") logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
time.sleep(1) #time.sleep(10)
continue
else: else:
logger.error(f"Failed to connect after multiple attempts: {e}") logger.error(f"Failed to connect after multiple attempts: {e}")
raise e raise e
if not browser: if not browser:
return return
for i, url in enumerate(urls_to_open): logger.info("Opening %s...", urls_to_open)
# Use the first context (which should be the only one if using default profile) for i, url in enumerate(urls_to_open):
if i == 0: # Use the first context (which should be the only one if using default profile)
context = browser.contexts[0] if i == 0:
context = browser.contexts[0]
page = context.new_page() # Create a new page (tab) within the existing context page = context.new_page() # Create a new page (tab) within the existing context
page.goto(url, timeout=60000) try:
logger.info(f"Opened tab {i + 1}: {url}") page.goto(url, timeout=60000)
except:
logger.warning("Opening %s exceeds time limit", url) # only for human test
logger.info(f"Opened tab {i + 1}: {url}")
if i == 0: if i == 0:
# clear the default tab # clear the default tab
default_page = context.pages[0] default_page = context.pages[0]
default_page.close() default_page.close()
# Do not close the context or browser; they will remain open after script ends # Do not close the context or browser; they will remain open after script ends
return browser, context return browser, context
def _chrome_close_tabs_setup(self, urls_to_close: List[str]): def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
time.sleep(5) # Wait for Chrome to finish launching time.sleep(5) # Wait for Chrome to finish launching
@@ -552,4 +564,4 @@ class SetupController:
else: else:
raise NotImplementedError raise NotImplementedError
return browser, context return browser, context

View File

@@ -12,6 +12,16 @@
"--remote-debugging-port=9222" "--remote-debugging-port=9222"
] ]
} }
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
} }
], ],
"trajectory": "trajectories/", "trajectory": "trajectories/",

View File

@@ -47,9 +47,9 @@ def human_agent():
Runs the Gym environment with human input. Runs the Gym environment with human input.
""" """
with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f: with open("evaluation_examples/examples/chrome/af630914-714e-4a24-a7bb-f9af687d3b91.json", "r") as f:
example = json.load(f) example = json.load(f)
example["snapshot"] = "Snapshot 30" example["snapshot"] = "Snapshot 35"
#env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" #env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx"
env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx" env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"