ver Feb2nd

human evaluation as non-expert on chrome tasks
This commit is contained in:
David Chang
2024-02-02 05:13:12 +08:00
parent 9df0854469
commit 5ee9621e0d
4 changed files with 46 additions and 24 deletions

View File

@@ -1 +1 @@
exp_som
test

View File

@@ -7,6 +7,7 @@ import uuid
import tempfile
from typing import Any, Union, Optional
from typing import Dict, List
import os
import requests
from pydrive.auth import GoogleAuth
@@ -114,6 +115,7 @@ class SetupController:
if not os.path.exists(cache_path):
max_retries = 3
downloaded = False
e = None
for i in range(max_retries):
try:
response = requests.get(url, stream=True)
@@ -128,7 +130,7 @@ class SetupController:
break
except requests.RequestException as e:
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
@@ -344,39 +346,49 @@ class SetupController:
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
logger.info("Connect to Chrome @: %s", remote_debugging_url)
logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
for attempt in range(15):
if attempt>0:
time.sleep(5)
browser = None
for attempt in range(15):
with sync_playwright() as p:
try:
browser = p.chromium.connect_over_cdp(remote_debugging_url)
break
#break
except Exception as e:
if attempt < 14:
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
time.sleep(1)
#time.sleep(10)
continue
else:
logger.error(f"Failed to connect after multiple attempts: {e}")
raise e
if not browser:
return
if not browser:
return
for i, url in enumerate(urls_to_open):
# Use the first context (which should be the only one if using default profile)
if i == 0:
context = browser.contexts[0]
logger.info("Opening %s...", urls_to_open)
for i, url in enumerate(urls_to_open):
# Use the first context (which should be the only one if using default profile)
if i == 0:
context = browser.contexts[0]
page = context.new_page() # Create a new page (tab) within the existing context
page.goto(url, timeout=60000)
logger.info(f"Opened tab {i + 1}: {url}")
page = context.new_page() # Create a new page (tab) within the existing context
try:
page.goto(url, timeout=60000)
except:
logger.warning("Opening %s exceeds time limit", url) # only for human test
logger.info(f"Opened tab {i + 1}: {url}")
if i == 0:
# clear the default tab
default_page = context.pages[0]
default_page.close()
if i == 0:
# clear the default tab
default_page = context.pages[0]
default_page.close()
# Do not close the context or browser; they will remain open after script ends
return browser, context
# Do not close the context or browser; they will remain open after script ends
return browser, context
def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
time.sleep(5) # Wait for Chrome to finish launching
@@ -552,4 +564,4 @@ class SetupController:
else:
raise NotImplementedError
return browser, context
return browser, context

View File

@@ -12,6 +12,16 @@
"--remote-debugging-port=9222"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
}
],
"trajectory": "trajectories/",

View File

@@ -47,9 +47,9 @@ def human_agent():
Runs the Gym environment with human input.
"""
with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f:
with open("evaluation_examples/examples/chrome/af630914-714e-4a24-a7bb-f9af687d3b91.json", "r") as f:
example = json.load(f)
example["snapshot"] = "Snapshot 30"
example["snapshot"] = "Snapshot 35"
#env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx"
env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"