[Feature] Initialize and Implement Aguvis Evaluation on OSWorld (#98)

* Initialize Aguvis eval on OSWorld

* Debug

* Debug

* v1, internal version

* Add experiments script

* Fix minor bugs

* Update new endpoint

* Update ip

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix model name

* Fix docker close issues; update prompting

* Fix missed

* Fix the default port to avoid crashing on examples like '_update_browse_history_setup'

* Fix server and chromium ports in setup

* Revert and add missed dependency

* Add VLC port for docker

* Update

* Clean

---------

Co-authored-by: Tianbao Xie <tianbaoxie@U-492FC39R-0217.local>
Co-authored-by: FredWuCZ <fredwucz@outlook.com>
This commit is contained in:
Tianbao Xie
2024-11-11 12:36:16 +08:00
committed by GitHub
parent b35dc40ff4
commit 20442244fa
7 changed files with 910 additions and 10 deletions

View File

@@ -28,10 +28,11 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__))
class SetupController:
def __init__(self, vm_ip: str, server_port: int, chromium_port: int, cache_dir: str):
def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"):
self.vm_ip: str = vm_ip
self.server_port: int = server_port
self.chromium_port: int = chromium_port
self.vlc_port: int = vlc_port
self.http_server: str = f"http://{vm_ip}:{server_port}"
self.http_server_setup_root: str = f"http://{vm_ip}:{server_port}/setup"
self.cache_dir: str = cache_dir
@@ -532,7 +533,7 @@ class SetupController:
"""
host = self.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
port = self.chromium_port
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
@@ -643,7 +644,7 @@ class SetupController:
logger.info('Fake browsing history added successfully.')
controller = PythonController(self.vm_ip)
controller = PythonController(self.vm_ip, self.server_port)
# get the path of the history file according to the platform
os_type = controller.get_vm_platform()