[Feature] Initialize and Implement Aguvis Evaluation on OSWorld (#98)

* Initialize Aguvis eval on OSWorld

* Debug

* Debug

* v1, internal version

* Add experiments script

* Fix minor bugs

* Update new endpoint

* Update ip

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix model name

* Fix docker close issues; update prompting

* Fix missed

* Fix the default port to avoid crashing on examples like '_update_browse_history_setup'

* Fix server and chromium ports in setup

* Revert and add missed dependency

* Add VLC port for docker

* Update

* Clean

---------

Co-authored-by: Tianbao Xie <tianbaoxie@U-492FC39R-0217.local>
Co-authored-by: FredWuCZ <fredwucz@outlook.com>
This commit is contained in:
Tianbao Xie
2024-11-11 12:36:16 +08:00
committed by GitHub
parent b35dc40ff4
commit 20442244fa
7 changed files with 910 additions and 10 deletions

View File

@@ -28,6 +28,8 @@ class DockerProvider(Provider):
self.server_port = None
self.vnc_port = None
self.chromium_port = None
self.vlc_port = None
self.container = None
self.environment = {"DISK_SIZE": "32G", "RAM_SIZE": "4G", "CPU_CORES": "4"} # Modify if needed
temp_dir = Path(os.getenv('TEMP') if platform.system() == 'Windows' else '/tmp')
@@ -92,6 +94,7 @@ class DockerProvider(Provider):
self.vnc_port = self._get_available_port(8006)
self.server_port = self._get_available_port(5000)
self.chromium_port = self._get_available_port(9222)
self.vlc_port = self._get_available_port(8080)
# Start container while still holding the lock
self.container = self.client.containers.run(
@@ -108,13 +111,14 @@ class DockerProvider(Provider):
ports={
8006: self.vnc_port,
5000: self.server_port,
9222: self.chromium_port
9222: self.chromium_port,
8080: self.vlc_port
},
detach=True
)
logger.info(f"Started container with ports - VNC: {self.vnc_port}, "
f"Server: {self.server_port}, Chrome: {self.chromium_port}")
f"Server: {self.server_port}, Chrome: {self.chromium_port}, VLC: {self.vlc_port}")
# Wait for VM to be ready
self._wait_for_vm_ready()
@@ -130,15 +134,15 @@ class DockerProvider(Provider):
raise e
def get_ip_address(self, path_to_vm: str) -> str:
if not all([self.server_port, self.chromium_port, self.vnc_port]):
if not all([self.server_port, self.chromium_port, self.vnc_port, self.vlc_port]):
raise RuntimeError("VM not started - ports not allocated")
return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}"
return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}:{self.vlc_port}"
def save_state(self, path_to_vm: str, snapshot_name: str):
raise NotImplementedError("Snapshots not available for Docker provider")
def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
pass
self.stop_emulator(path_to_vm)
def stop_emulator(self, path_to_vm: str):
if self.container:
@@ -154,3 +158,4 @@ class DockerProvider(Provider):
self.server_port = None
self.vnc_port = None
self.chromium_port = None
self.vlc_port = None