From 7c64b6273596f6be94401ba02a088ae5a43e78f4 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 27 Sep 2024 11:03:21 +0800 Subject: [PATCH 01/22] Add docker provider framework --- desktop_env/providers/__init__.py | 20 +- desktop_env/providers/docker/manager.py | 331 +++++++++++++++++++++++ desktop_env/providers/docker/provider.py | 64 +++++ 3 files changed, 407 insertions(+), 8 deletions(-) create mode 100644 desktop_env/providers/docker/manager.py create mode 100644 desktop_env/providers/docker/provider.py diff --git a/desktop_env/providers/__init__.py b/desktop_env/providers/__init__.py index 8c12197..7c95382 100644 --- a/desktop_env/providers/__init__.py +++ b/desktop_env/providers/__init__.py @@ -1,12 +1,4 @@ from desktop_env.providers.base import VMManager, Provider -from desktop_env.providers.vmware.manager import VMwareVMManager -from desktop_env.providers.vmware.provider import VMwareProvider -from desktop_env.providers.aws.manager import AWSVMManager -from desktop_env.providers.aws.provider import AWSProvider -from desktop_env.providers.azure.manager import AzureVMManager -from desktop_env.providers.azure.provider import AzureProvider -from desktop_env.providers.virtualbox.manager import VirtualBoxVMManager -from desktop_env.providers.virtualbox.provider import VirtualBoxProvider def create_vm_manager_and_provider(provider_name: str, region: str): """ @@ -14,12 +6,24 @@ def create_vm_manager_and_provider(provider_name: str, region: str): """ provider_name = provider_name.lower().strip() if provider_name == "vmware": + from desktop_env.providers.vmware.manager import VMwareVMManager + from desktop_env.providers.vmware.provider import VMwareProvider return VMwareVMManager(), VMwareProvider(region) elif provider_name == "virtualbox": + from desktop_env.providers.virtualbox.manager import VirtualBoxVMManager + from desktop_env.providers.virtualbox.provider import VirtualBoxProvider return VirtualBoxVMManager(), VirtualBoxProvider(region) elif provider_name in ["aws", "amazon web services"]: + from desktop_env.providers.aws.manager import AWSVMManager + from desktop_env.providers.aws.provider import AWSProvider return AWSVMManager(), AWSProvider(region) elif provider_name == "azure": + from desktop_env.providers.azure.manager import AzureVMManager + from desktop_env.providers.azure.provider import AzureProvider return AzureVMManager(), AzureProvider(region) + elif provider_name == "docker": + from desktop_env.providers.docker.manager import DockerVMManager + from desktop_env.providers.docker.provider import DockerProvider + return DockerVMManager(), DockerProvider(region) else: raise NotImplementedError(f"{provider_name} not implemented!") diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py new file mode 100644 index 0000000..e1ebdb8 --- /dev/null +++ b/desktop_env/providers/docker/manager.py @@ -0,0 +1,331 @@ +import os +import platform +import random +import re + +import threading +from filelock import FileLock +import uuid +import zipfile + +from time import sleep +import shutil +import psutil +import subprocess +import requests +from tqdm import tqdm +import docker + +import logging + +from desktop_env.providers.base import VMManager + +logger = logging.getLogger("desktopenv.providers.vmware.VMwareVMManager") +logger.setLevel(logging.INFO) + +MAX_RETRY_TIMES = 10 +RETRY_INTERVAL = 5 +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu-x86.zip" + +# Determine the platform and CPU architecture to decide the correct VM image to download +# if platform.system() == 'Darwin': # macOS +# # if os.uname().machine == 'arm64': # Apple Silicon +# URL = UBUNTU_ARM_URL +# # else: +# # url = UBUNTU_X86_URL +# elif platform.machine().lower() in ['amd64', 'x86_64']: +# URL = UBUNTU_X86_URL +# else: +# raise Exception("Unsupported platform or architecture") + +URL = UBUNTU_X86_URL + +DOWNLOADED_FILE_NAME = URL.split('/')[-1] +REGISTRY_PATH = '.docker_vms' +LOCK_FILE_NAME = '.docker_lck' +VMS_DIR = "./docker_vm_data" +update_lock = threading.Lock() + +if platform.system() == 'Windows': + docker_path = r"C:\Program Files\Docker\Docker" + os.environ["PATH"] += os.pathsep + docker_path + +def generate_new_vm_name(vms_dir, os_type): + registry_idx = 0 + prefix = os_type + while True: + attempted_new_name = f"{prefix}{registry_idx}" + if os.path.exists( + os.path.join(vms_dir, attempted_new_name, attempted_new_name + ".qcow2")): + registry_idx += 1 + else: + return attempted_new_name + +def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_name="Ubuntu"): + os.makedirs(vms_dir, exist_ok=True) + + def __download_and_unzip_vm(): + # Download the virtual machine image + logger.info("Downloading the virtual machine image...") + downloaded_size = 0 + + if os_type == "Ubuntu": + if platform.system() == 'Darwin': + URL = UBUNTU_X86_URL + elif platform.machine().lower() in ['amd64', 'x86_64']: + URL = UBUNTU_X86_URL + elif os_type == "Windows": + if platform.machine().lower() in ['amd64', 'x86_64']: + URL = WINDOWS_X86_URL + DOWNLOADED_FILE_NAME = URL.split('/')[-1] + downloaded_file_name = DOWNLOADED_FILE_NAME + + while True: + downloaded_file_path = os.path.join(vms_dir, downloaded_file_name) + headers = {} + if os.path.exists(downloaded_file_path): + downloaded_size = os.path.getsize(downloaded_file_path) + headers["Range"] = f"bytes={downloaded_size}-" + + with requests.get(URL, headers=headers, stream=True) as response: + if response.status_code == 416: + # This means the range was not satisfiable, possibly the file was fully downloaded + logger.info("Fully downloaded or the file size changed.") + break + + response.raise_for_status() + total_size = int(response.headers.get('content-length', 0)) + + with open(downloaded_file_path, "ab") as file, tqdm( + desc="Progress", + total=total_size, + unit='iB', + unit_scale=True, + unit_divisor=1024, + initial=downloaded_size, + ascii=True + ) as progress_bar: + try: + for data in response.iter_content(chunk_size=1024): + size = file.write(data) + progress_bar.update(size) + except (requests.exceptions.RequestException, IOError) as e: + logger.error(f"Download error: {e}") + sleep(RETRY_INTERVAL) + logger.error("Retrying...") + else: + logger.info("Download succeeds.") + break # Download completed successfully + + # Unzip the downloaded file + logger.info("Unzipping the downloaded file...☕️") + with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: + zip_ref.extractall(os.path.join(vms_dir, vm_name)) + logger.info("Files have been successfully extracted to the directory: " + str(os.path.join(vms_dir, vm_name))) + + vm_path = os.path.join(vms_dir, vm_name, vm_name, vm_name + ".vmx") + + # Start the virtual machine + def start_vm(vm_path, max_retries=20): + pass + + if not start_vm(vm_path): + raise ValueError("Error encountered during installation, please rerun the code for retrying.") + + def get_vm_ip_and_port(vm_path, max_retries=20): + pass + + vm_ip, vm_port = get_vm_ip_and_port(vm_path) + if not vm_ip: + raise ValueError("Error encountered during installation, please rerun the code for retrying.") + + # Function used to check whether the virtual machine is ready + def download_screenshot(ip, port): + url = f"http://{ip}:{port}/screenshot" + try: + # max trey times 1, max timeout 1 + response = requests.get(url, timeout=(10, 10)) + if response.status_code == 200: + return True + except Exception as e: + logger.error(f"Error: {e}") + logger.error(f"Type: {type(e).__name__}") + logger.error(f"Error detail: {str(e)}") + sleep(RETRY_INTERVAL) + return False + + # Try downloading the screenshot until successful + while not download_screenshot(vm_ip, vm_port): + logger.info("Check whether the virtual machine is ready...") + + logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...") + + +class DockerVMManager(VMManager): + def __init__(self, registry_path=REGISTRY_PATH): + self.registry_path = registry_path + self.lock = FileLock(LOCK_FILE_NAME, timeout=60) + self.initialize_registry() + self.client = docker.from_env() + + def initialize_registry(self): + with self.lock: # Locking during initialization + if not os.path.exists(self.registry_path): + with open(self.registry_path, 'w') as file: + file.write('') + + def add_vm(self, vm_path, lock_needed=True): + if lock_needed: + with self.lock: + self._add_vm(vm_path) + else: + self._add_vm(vm_path) + + def _add_vm(self, vm_path, region=None): + assert region in [None, 'local'], "For VMware provider, the region should be neither None or 'local'." + with self.lock: + with open(self.registry_path, 'r') as file: + lines = file.readlines() + new_lines = lines + [f'{vm_path}|free\n'] + with open(self.registry_path, 'w') as file: + file.writelines(new_lines) + + def occupy_vm(self, vm_path, pid, lock_needed=True): + if lock_needed: + with self.lock: + self._occupy_vm(vm_path, pid) + else: + self._occupy_vm(vm_path, pid) + + def _occupy_vm(self, vm_path, pid, region=None): + assert region in [None, 'local'], "For VMware provider, the region should be neither None or 'local'." + with self.lock: + new_lines = [] + with open(self.registry_path, 'r') as file: + lines = file.readlines() + for line in lines: + registered_vm_path, _ = line.strip().split('|') + if registered_vm_path == vm_path: + new_lines.append(f'{registered_vm_path}|{pid}\n') + else: + new_lines.append(line) + with open(self.registry_path, 'w') as file: + file.writelines(new_lines) + + def delete_vm(self, vm_path, lock_needed=True): + if lock_needed: + with self.lock: + self._delete_vm(vm_path) + else: + self._delete_vm(vm_path) + + def _delete_vm(self, vm_path): + raise NotImplementedError + + def check_and_clean(self, vms_dir, lock_needed=True): + if lock_needed: + with self.lock: + self._check_and_clean(vms_dir) + else: + self._check_and_clean(vms_dir) + + def _check_and_clean(self, vms_dir): + with self.lock: # Lock when cleaning up the registry and vms_dir + # Check and clean on the running vms, detect the released ones and mark then as 'free' + active_pids = {p.pid for p in psutil.process_iter()} + new_lines = [] + vm_paths = [] + + with open(self.registry_path, 'r') as file: + lines = file.readlines() + for line in lines: + vm_path, pid_str = line.strip().split('|') + if not os.path.exists(vm_path): + logger.info(f"VM {vm_path} not found, releasing it.") + new_lines.append(f'{vm_path}|free\n') + continue + + vm_paths.append(vm_path) + if pid_str == "free": + new_lines.append(line) + continue + + if int(pid_str) in active_pids: + new_lines.append(line) + else: + new_lines.append(f'{vm_path}|free\n') + with open(self.registry_path, 'w') as file: + file.writelines(new_lines) + + # Check and clean on the files inside vms_dir, delete the unregistered ones + os.makedirs(vms_dir, exist_ok=True) + vm_names = os.listdir(vms_dir) + for vm_name in vm_names: + # skip the downloaded .zip file + if vm_name == DOWNLOADED_FILE_NAME: + continue + # Skip the .DS_Store file on macOS + if vm_name == ".DS_Store": + continue + + flag = True + for vm_path in vm_paths: + if vm_name + ".qcow2" in vm_path: + flag = False + elif vm_name + ".img" in vm_path: + flag = False + if flag: + shutil.rmtree(os.path.join(vms_dir, vm_name)) + + def list_free_vms(self, lock_needed=True): + if lock_needed: + with self.lock: + return self._list_free_vms() + else: + return self._list_free_vms() + + def _list_free_vms(self): + with self.lock: # Lock when reading the registry + free_vms = [] + with open(self.registry_path, 'r') as file: + lines = file.readlines() + for line in lines: + vm_path, pid_str = line.strip().split('|') + if pid_str == "free": + free_vms.append((vm_path, pid_str)) + return free_vms + + def get_vm_path(self, os_type, region=None): + with self.lock: + if not DockerVMManager.checked_and_cleaned: + DockerVMManager.checked_and_cleaned = True + self._check_and_clean(vms_dir=VMS_DIR) + + allocation_needed = False + with self.lock: + free_vms_paths = self._list_free_vms() + if len(free_vms_paths) == 0: + # No free virtual machine available, generate a new one + allocation_needed = True + else: + # Choose the first free virtual machine + chosen_vm_path = free_vms_paths[0][0] + self._occupy_vm(chosen_vm_path, os.getpid()) + return chosen_vm_path + + if allocation_needed: + logger.info("No free virtual machine available. Generating a new one, which would take a while...☕") + new_vm_name = generate_new_vm_name(vms_dir=VMS_DIR, os_type=os_type) + + original_vm_name = None + if os_type == "Ubuntu": + original_vm_name = "Ubuntu" + elif os_type == "Windows": + original_vm_name = "Windows 10 x64" + + new_vm_path = _install_vm(new_vm_name, vms_dir=VMS_DIR, + downloaded_file_name=DOWNLOADED_FILE_NAME, original_vm_name=original_vm_name, os_type=os_type) + with self.lock: + self._add_vm(new_vm_path) + self._occupy_vm(new_vm_path, os.getpid()) + return new_vm_path diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py new file mode 100644 index 0000000..b60eaca --- /dev/null +++ b/desktop_env/providers/docker/provider.py @@ -0,0 +1,64 @@ +import logging +import os +import platform +import subprocess +import time +import docker + +from desktop_env.providers.base import Provider + +logger = logging.getLogger("desktopenv.providers.vmware.VMwareProvider") +logger.setLevel(logging.INFO) + +WAIT_TIME = 3 + + +def get_vmrun_type(return_list=False): + if platform.system() == 'Windows' or platform.system() == 'Linux': + if return_list: + return ['-T', 'ws'] + else: + return '-T ws' + elif platform.system() == 'Darwin': # Darwin is the system name for macOS + if return_list: + return ['-T', 'fusion'] + else: + return '-T fusion' + else: + raise Exception("Unsupported operating system") + + +class DockerProvider(Provider): + def __init__(self, region: str): + self.client = docker.from_env() + + @staticmethod + def _execute_command(command: list, return_output=False): + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding="utf-8" + ) + + if return_output: + output = process.communicate()[0].strip() + return output + else: + return None + + def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): + pass + + def get_ip_address(self, path_to_vm: str) -> str: + pass + + def save_state(self, path_to_vm: str, snapshot_name: str): + pass + + def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): + pass + + def stop_emulator(self, path_to_vm: str): + pass \ No newline at end of file From 6498b2771b5a977caf9c29d12846498cc4a506ea Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 27 Sep 2024 11:07:54 +0800 Subject: [PATCH 02/22] Update VM download link --- desktop_env/providers/docker/manager.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index e1ebdb8..86fba12 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -25,7 +25,7 @@ logger.setLevel(logging.INFO) MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 -UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu-x86.zip" +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" # Determine the platform and CPU architecture to decide the correct VM image to download # if platform.system() == 'Darwin': # macOS @@ -117,13 +117,13 @@ def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_nam logger.info("Download succeeds.") break # Download completed successfully - # Unzip the downloaded file - logger.info("Unzipping the downloaded file...☕️") - with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: - zip_ref.extractall(os.path.join(vms_dir, vm_name)) - logger.info("Files have been successfully extracted to the directory: " + str(os.path.join(vms_dir, vm_name))) + vm_path = os.path.join(vms_dir, vm_name, vm_name + ".qcow2") - vm_path = os.path.join(vms_dir, vm_name, vm_name, vm_name + ".vmx") + # Execute the function to download and unzip the VM, and update the vm metadata + if not os.path.exists(vm_path): + __download_and_unzip_vm() + else: + logger.info(f"Virtual machine exists: {vm_path}") # Start the virtual machine def start_vm(vm_path, max_retries=20): @@ -272,8 +272,6 @@ class DockerVMManager(VMManager): for vm_path in vm_paths: if vm_name + ".qcow2" in vm_path: flag = False - elif vm_name + ".img" in vm_path: - flag = False if flag: shutil.rmtree(os.path.join(vms_dir, vm_name)) From da18f2f6e9f3cb202fbf3aa790b8b9ea56d73a18 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 27 Sep 2024 13:22:57 +0800 Subject: [PATCH 03/22] Add stop container --- desktop_env/providers/docker/provider.py | 30 +++++++++--------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index b60eaca..fb3f77b 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -12,22 +12,6 @@ logger.setLevel(logging.INFO) WAIT_TIME = 3 - -def get_vmrun_type(return_list=False): - if platform.system() == 'Windows' or platform.system() == 'Linux': - if return_list: - return ['-T', 'ws'] - else: - return '-T ws' - elif platform.system() == 'Darwin': # Darwin is the system name for macOS - if return_list: - return ['-T', 'fusion'] - else: - return '-T fusion' - else: - raise Exception("Unsupported operating system") - - class DockerProvider(Provider): def __init__(self, region: str): self.client = docker.from_env() @@ -49,16 +33,24 @@ class DockerProvider(Provider): return None def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): - pass + self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: 8006, 5000: 5001}, detach=True) def get_ip_address(self, path_to_vm: str) -> str: pass def save_state(self, path_to_vm: str, snapshot_name: str): - pass + logger.info("Saving VM state...") + DockerProvider._execute_command( + ["qemu-img", "convert", "-O", "qcow2", snapshot_name, "temp.qcow2"]) + DockerProvider._execute_command( + ["mv", "temp.qcow2", path_to_vm] + ) + time.sleep(WAIT_TIME) # Wait for the VM to save def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): pass def stop_emulator(self, path_to_vm: str): - pass \ No newline at end of file + logger.info("Stopping VMware VM...") + self.container.stop(WAIT_TIME) + self.container.remove() \ No newline at end of file From 5552ac9dcf7ddacf2629ba49afa0d63019c0118c Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Sat, 28 Sep 2024 10:40:27 +0800 Subject: [PATCH 04/22] Update docker manager & provider --- desktop_env/controllers/python.py | 3 +- desktop_env/controllers/setup.py | 6 +- desktop_env/desktop_env.py | 16 +- desktop_env/evaluators/getters/general.py | 2 +- desktop_env/providers/docker/manager.py | 326 ++++------------------ desktop_env/providers/docker/provider.py | 44 ++- 6 files changed, 89 insertions(+), 308 deletions(-) diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py index a3b19c5..c572083 100644 --- a/desktop_env/controllers/python.py +++ b/desktop_env/controllers/python.py @@ -12,9 +12,10 @@ logger = logging.getLogger("desktopenv.pycontroller") class PythonController: def __init__(self, vm_ip: str, + server_port: int, pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}"): self.vm_ip = vm_ip - self.http_server = f"http://{vm_ip}:5000" + self.http_server = f"http://{vm_ip}:{server_port}" self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages self.retry_times = 3 self.retry_interval = 5 diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index b4ae4b2..dfac4b3 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -28,10 +28,10 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__)) class SetupController: - def __init__(self, vm_ip: str, cache_dir: str): + def __init__(self, vm_ip: str, server_port: int, cache_dir: str): self.vm_ip: str = vm_ip - self.http_server: str = f"http://{vm_ip}:5000" - self.http_server_setup_root: str = f"http://{vm_ip}:5000/setup" + self.http_server: str = f"http://{vm_ip}:{server_port}" + self.http_server_setup_root: str = f"http://{vm_ip}:{server_port}/setup" self.cache_dir: str = cache_dir def reset_cache_dir(self, cache_dir: str): diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index e02eacf..a03ebb2 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -26,7 +26,7 @@ class DesktopEnv(gym.Env): def __init__( self, - provider_name: str = "vmware", + provider_name: str = "docker", region: str = None, path_to_vm: str = None, snapshot_name: str = "init_state", @@ -54,6 +54,11 @@ class DesktopEnv(gym.Env): """ # Initialize VM manager and vitualization provider self.region = region + + # Default + self.server_port = 5000 + self.chromium_port = 9222 + self.vnc_port = 8006 self.manager, self.provider = create_vm_manager_and_provider(provider_name, region) self.os_type = os_type @@ -92,9 +97,14 @@ class DesktopEnv(gym.Env): self.provider.start_emulator(self.path_to_vm, self.headless, self.os_type) # Get the ip from the virtual machine, and setup the controller - self.vm_ip = self.provider.get_ip_address(self.path_to_vm) + vm_ip_ports = self.provider.get_ip_address(self.path_to_vm).split(':') + self.vm_ip = vm_ip_ports[0] + if len(vm_ip_ports) > 1: + self.server_port = int(vm_ip_ports[1]) + self.chromium_port = int(vm_ip_ports[2]) + self.vnc_port = int(vm_ip_ports[3]) self.controller = PythonController(vm_ip=self.vm_ip) - self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir_base) + self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, cache_dir=self.cache_dir_base) def _revert_to_snapshot(self): # Revert to certain snapshot of the virtual machine, and refresh the path to vm and ip of vm diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index 81ad69b..d5d965c 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -23,7 +23,7 @@ def get_vm_command_line(env, config: Dict[str, str]): def get_vm_command_error(env, config: Dict[str, str]): vm_ip = env.vm_ip - port = 5000 + port = env.server_port command = config["command"] shell = config.get("shell", False) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index 86fba12..36dfbf1 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -25,7 +25,7 @@ logger.setLevel(logging.INFO) MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 -UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +UBUNTU_X86_URL = "docker-osworld-x86" # Determine the platform and CPU architecture to decide the correct VM image to download # if platform.system() == 'Darwin': # macOS @@ -41,289 +41,65 @@ UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve URL = UBUNTU_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] -REGISTRY_PATH = '.docker_vms' -LOCK_FILE_NAME = '.docker_lck' -VMS_DIR = "./docker_vm_data" -update_lock = threading.Lock() if platform.system() == 'Windows': docker_path = r"C:\Program Files\Docker\Docker" os.environ["PATH"] += os.pathsep + docker_path -def generate_new_vm_name(vms_dir, os_type): - registry_idx = 0 - prefix = os_type +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +VMS_DIR = "./vmware_vm_data" + +def __download_vm(vms_dir: str): + # Download the virtual machine image + logger.info("Downloading the virtual machine image...") + downloaded_size = 0 + + URL = UBUNTU_X86_URL + DOWNLOADED_FILE_NAME = URL.split('/')[-1] + downloaded_file_name = DOWNLOADED_FILE_NAME + while True: - attempted_new_name = f"{prefix}{registry_idx}" - if os.path.exists( - os.path.join(vms_dir, attempted_new_name, attempted_new_name + ".qcow2")): - registry_idx += 1 - else: - return attempted_new_name + downloaded_file_path = os.path.join(vms_dir, downloaded_file_name) + headers = {} + if os.path.exists(downloaded_file_path): + downloaded_size = os.path.getsize(downloaded_file_path) + headers["Range"] = f"bytes={downloaded_size}-" -def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_name="Ubuntu"): - os.makedirs(vms_dir, exist_ok=True) + with requests.get(URL, headers=headers, stream=True) as response: + if response.status_code == 416: + # This means the range was not satisfiable, possibly the file was fully downloaded + logger.info("Fully downloaded or the file size changed.") + break - def __download_and_unzip_vm(): - # Download the virtual machine image - logger.info("Downloading the virtual machine image...") - downloaded_size = 0 - - if os_type == "Ubuntu": - if platform.system() == 'Darwin': - URL = UBUNTU_X86_URL - elif platform.machine().lower() in ['amd64', 'x86_64']: - URL = UBUNTU_X86_URL - elif os_type == "Windows": - if platform.machine().lower() in ['amd64', 'x86_64']: - URL = WINDOWS_X86_URL - DOWNLOADED_FILE_NAME = URL.split('/')[-1] - downloaded_file_name = DOWNLOADED_FILE_NAME - - while True: - downloaded_file_path = os.path.join(vms_dir, downloaded_file_name) - headers = {} - if os.path.exists(downloaded_file_path): - downloaded_size = os.path.getsize(downloaded_file_path) - headers["Range"] = f"bytes={downloaded_size}-" - - with requests.get(URL, headers=headers, stream=True) as response: - if response.status_code == 416: - # This means the range was not satisfiable, possibly the file was fully downloaded - logger.info("Fully downloaded or the file size changed.") - break - - response.raise_for_status() - total_size = int(response.headers.get('content-length', 0)) - - with open(downloaded_file_path, "ab") as file, tqdm( - desc="Progress", - total=total_size, - unit='iB', - unit_scale=True, - unit_divisor=1024, - initial=downloaded_size, - ascii=True - ) as progress_bar: - try: - for data in response.iter_content(chunk_size=1024): - size = file.write(data) - progress_bar.update(size) - except (requests.exceptions.RequestException, IOError) as e: - logger.error(f"Download error: {e}") - sleep(RETRY_INTERVAL) - logger.error("Retrying...") - else: - logger.info("Download succeeds.") - break # Download completed successfully - - vm_path = os.path.join(vms_dir, vm_name, vm_name + ".qcow2") - - # Execute the function to download and unzip the VM, and update the vm metadata - if not os.path.exists(vm_path): - __download_and_unzip_vm() - else: - logger.info(f"Virtual machine exists: {vm_path}") - - # Start the virtual machine - def start_vm(vm_path, max_retries=20): - pass - - if not start_vm(vm_path): - raise ValueError("Error encountered during installation, please rerun the code for retrying.") - - def get_vm_ip_and_port(vm_path, max_retries=20): - pass - - vm_ip, vm_port = get_vm_ip_and_port(vm_path) - if not vm_ip: - raise ValueError("Error encountered during installation, please rerun the code for retrying.") - - # Function used to check whether the virtual machine is ready - def download_screenshot(ip, port): - url = f"http://{ip}:{port}/screenshot" - try: - # max trey times 1, max timeout 1 - response = requests.get(url, timeout=(10, 10)) - if response.status_code == 200: - return True - except Exception as e: - logger.error(f"Error: {e}") - logger.error(f"Type: {type(e).__name__}") - logger.error(f"Error detail: {str(e)}") - sleep(RETRY_INTERVAL) - return False - - # Try downloading the screenshot until successful - while not download_screenshot(vm_ip, vm_port): - logger.info("Check whether the virtual machine is ready...") - - logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...") + response.raise_for_status() + total_size = int(response.headers.get('content-length', 0)) + with open(downloaded_file_path, "ab") as file, tqdm( + desc="Progress", + total=total_size, + unit='iB', + unit_scale=True, + unit_divisor=1024, + initial=downloaded_size, + ascii=True + ) as progress_bar: + try: + for data in response.iter_content(chunk_size=1024): + size = file.write(data) + progress_bar.update(size) + except (requests.exceptions.RequestException, IOError) as e: + logger.error(f"Download error: {e}") + sleep(RETRY_INTERVAL) + logger.error("Retrying...") + else: + logger.info("Download succeeds.") + break # Download completed successfully class DockerVMManager(VMManager): - def __init__(self, registry_path=REGISTRY_PATH): - self.registry_path = registry_path - self.lock = FileLock(LOCK_FILE_NAME, timeout=60) - self.initialize_registry() - self.client = docker.from_env() + def __init__(self, registry_path=""): + pass - def initialize_registry(self): - with self.lock: # Locking during initialization - if not os.path.exists(self.registry_path): - with open(self.registry_path, 'w') as file: - file.write('') - - def add_vm(self, vm_path, lock_needed=True): - if lock_needed: - with self.lock: - self._add_vm(vm_path) - else: - self._add_vm(vm_path) - - def _add_vm(self, vm_path, region=None): - assert region in [None, 'local'], "For VMware provider, the region should be neither None or 'local'." - with self.lock: - with open(self.registry_path, 'r') as file: - lines = file.readlines() - new_lines = lines + [f'{vm_path}|free\n'] - with open(self.registry_path, 'w') as file: - file.writelines(new_lines) - - def occupy_vm(self, vm_path, pid, lock_needed=True): - if lock_needed: - with self.lock: - self._occupy_vm(vm_path, pid) - else: - self._occupy_vm(vm_path, pid) - - def _occupy_vm(self, vm_path, pid, region=None): - assert region in [None, 'local'], "For VMware provider, the region should be neither None or 'local'." - with self.lock: - new_lines = [] - with open(self.registry_path, 'r') as file: - lines = file.readlines() - for line in lines: - registered_vm_path, _ = line.strip().split('|') - if registered_vm_path == vm_path: - new_lines.append(f'{registered_vm_path}|{pid}\n') - else: - new_lines.append(line) - with open(self.registry_path, 'w') as file: - file.writelines(new_lines) - - def delete_vm(self, vm_path, lock_needed=True): - if lock_needed: - with self.lock: - self._delete_vm(vm_path) - else: - self._delete_vm(vm_path) - - def _delete_vm(self, vm_path): - raise NotImplementedError - - def check_and_clean(self, vms_dir, lock_needed=True): - if lock_needed: - with self.lock: - self._check_and_clean(vms_dir) - else: - self._check_and_clean(vms_dir) - - def _check_and_clean(self, vms_dir): - with self.lock: # Lock when cleaning up the registry and vms_dir - # Check and clean on the running vms, detect the released ones and mark then as 'free' - active_pids = {p.pid for p in psutil.process_iter()} - new_lines = [] - vm_paths = [] - - with open(self.registry_path, 'r') as file: - lines = file.readlines() - for line in lines: - vm_path, pid_str = line.strip().split('|') - if not os.path.exists(vm_path): - logger.info(f"VM {vm_path} not found, releasing it.") - new_lines.append(f'{vm_path}|free\n') - continue - - vm_paths.append(vm_path) - if pid_str == "free": - new_lines.append(line) - continue - - if int(pid_str) in active_pids: - new_lines.append(line) - else: - new_lines.append(f'{vm_path}|free\n') - with open(self.registry_path, 'w') as file: - file.writelines(new_lines) - - # Check and clean on the files inside vms_dir, delete the unregistered ones - os.makedirs(vms_dir, exist_ok=True) - vm_names = os.listdir(vms_dir) - for vm_name in vm_names: - # skip the downloaded .zip file - if vm_name == DOWNLOADED_FILE_NAME: - continue - # Skip the .DS_Store file on macOS - if vm_name == ".DS_Store": - continue - - flag = True - for vm_path in vm_paths: - if vm_name + ".qcow2" in vm_path: - flag = False - if flag: - shutil.rmtree(os.path.join(vms_dir, vm_name)) - - def list_free_vms(self, lock_needed=True): - if lock_needed: - with self.lock: - return self._list_free_vms() - else: - return self._list_free_vms() - - def _list_free_vms(self): - with self.lock: # Lock when reading the registry - free_vms = [] - with open(self.registry_path, 'r') as file: - lines = file.readlines() - for line in lines: - vm_path, pid_str = line.strip().split('|') - if pid_str == "free": - free_vms.append((vm_path, pid_str)) - return free_vms - - def get_vm_path(self, os_type, region=None): - with self.lock: - if not DockerVMManager.checked_and_cleaned: - DockerVMManager.checked_and_cleaned = True - self._check_and_clean(vms_dir=VMS_DIR) - - allocation_needed = False - with self.lock: - free_vms_paths = self._list_free_vms() - if len(free_vms_paths) == 0: - # No free virtual machine available, generate a new one - allocation_needed = True - else: - # Choose the first free virtual machine - chosen_vm_path = free_vms_paths[0][0] - self._occupy_vm(chosen_vm_path, os.getpid()) - return chosen_vm_path - - if allocation_needed: - logger.info("No free virtual machine available. Generating a new one, which would take a while...☕") - new_vm_name = generate_new_vm_name(vms_dir=VMS_DIR, os_type=os_type) - - original_vm_name = None - if os_type == "Ubuntu": - original_vm_name = "Ubuntu" - elif os_type == "Windows": - original_vm_name = "Windows 10 x64" - - new_vm_path = _install_vm(new_vm_name, vms_dir=VMS_DIR, - downloaded_file_name=DOWNLOADED_FILE_NAME, original_vm_name=original_vm_name, os_type=os_type) - with self.lock: - self._add_vm(new_vm_path) - self._occupy_vm(new_vm_path, os.getpid()) - return new_vm_path + def get_vm_path(self, region): + if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): + __download_vm(VMS_DIR) + return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index fb3f77b..6920a9e 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -4,6 +4,7 @@ import platform import subprocess import time import docker +import psutil from desktop_env.providers.base import Provider @@ -15,42 +16,35 @@ WAIT_TIME = 3 class DockerProvider(Provider): def __init__(self, region: str): self.client = docker.from_env() + self.vnc_port = self._get_available_port(8006) + self.server_port = self._get_available_port(5000) + # self.remote_debugging_port = self._get_available_port(1337) + self.chromium_port = self._get_available_port(9222) + self.environment = {"DISK_SIZE": "64G", "RAM_SIZE": "4G", "CPU_CORES": "2"} # Modify if needed @staticmethod - def _execute_command(command: list, return_output=False): - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - encoding="utf-8" - ) - - if return_output: - output = process.communicate()[0].strip() - return output - else: - return None + def _get_available_port(port: int): + while port < 65354: + if port not in [conn.laddr.port for conn in psutil.net_connections()]: + return port + port += 1 def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): - self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: 8006, 5000: 5001}, detach=True) + # self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) + self.container = self.client.containers.run(path_to_vm, environment=self.environment, cap_add=["NET_ADMIN"], volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) def get_ip_address(self, path_to_vm: str) -> str: - pass + return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}" def save_state(self, path_to_vm: str, snapshot_name: str): - logger.info("Saving VM state...") - DockerProvider._execute_command( - ["qemu-img", "convert", "-O", "qcow2", snapshot_name, "temp.qcow2"]) - DockerProvider._execute_command( - ["mv", "temp.qcow2", path_to_vm] - ) - time.sleep(WAIT_TIME) # Wait for the VM to save + raise NotImplementedError("Not available for Docker.") def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): pass def stop_emulator(self, path_to_vm: str): - logger.info("Stopping VMware VM...") + logger.info("Stopping VM...") self.container.stop(WAIT_TIME) - self.container.remove() \ No newline at end of file + self.container.remove() + +# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume C:\Users\admin\Documents\Ubuntu.qcow2:/boot.qcow2 --cap-add NET_ADMIN --device /dev/kvm -p 8006:8006 -p 5000:5000 qemux/qemu-docker \ No newline at end of file From d0b37f0831e8a55b3718d490069c06735955f9fa Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Sat, 28 Sep 2024 12:49:29 +0800 Subject: [PATCH 05/22] Update --- desktop_env/controllers/setup.py | 8 +++-- desktop_env/desktop_env.py | 9 ++--- desktop_env/evaluators/getters/chrome.py | 42 ++++++++++++++--------- desktop_env/evaluators/getters/general.py | 2 +- desktop_env/providers/docker/manager.py | 34 +++++++++++++----- desktop_env/providers/docker/provider.py | 7 ++-- main.py | 2 +- requirements.txt | 4 --- 8 files changed, 67 insertions(+), 41 deletions(-) diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index dfac4b3..f03e9cd 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -28,8 +28,10 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__)) class SetupController: - def __init__(self, vm_ip: str, server_port: int, cache_dir: str): + def __init__(self, vm_ip: str, server_port: int, chromium_port: int, cache_dir: str): self.vm_ip: str = vm_ip + self.server_port: int = server_port + self.chromium_port: int = chromium_port self.http_server: str = f"http://{vm_ip}:{server_port}" self.http_server_setup_root: str = f"http://{vm_ip}:{server_port}/setup" self.cache_dir: str = cache_dir @@ -348,7 +350,7 @@ class SetupController: # Chrome setup def _chrome_open_tabs_setup(self, urls_to_open: List[str]): host = self.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = self.chromium_port # fixme: this port is hard-coded, need to be changed from config file remote_debugging_url = f"http://{host}:{port}" logger.info("Connect to Chrome @: %s", remote_debugging_url) @@ -399,7 +401,7 @@ class SetupController: time.sleep(5) # Wait for Chrome to finish launching host = self.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = self.server_port # fixme: this port is hard-coded, need to be changed from config file remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index a03ebb2..9b24d05 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -78,8 +78,9 @@ class DesktopEnv(gym.Env): self.require_terminal = require_terminal # Initialize emulator and controller - logger.info("Initializing...") - self._start_emulator() + if provider_name != "docker": # Check if this is applicable to other VM providers + logger.info("Initializing...") + self._start_emulator() # mode: human or machine self.instruction = None @@ -103,8 +104,8 @@ class DesktopEnv(gym.Env): self.server_port = int(vm_ip_ports[1]) self.chromium_port = int(vm_ip_ports[2]) self.vnc_port = int(vm_ip_ports[3]) - self.controller = PythonController(vm_ip=self.vm_ip) - self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, cache_dir=self.cache_dir_base) + self.controller = PythonController(vm_ip=self.vm_ip, server_port=self.server_port) + self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, chromium_port=self.chromium_port, cache_dir=self.cache_dir_base) def _revert_to_snapshot(self): # Revert to certain snapshot of the virtual machine, and refresh the path to vm and ip of vm diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 94c0dda..ba384de 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -54,7 +54,8 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any: """ try: host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: # connect to remote Chrome instance @@ -68,7 +69,7 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any: "--remote-debugging-port=1337" ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) @@ -454,7 +455,8 @@ def get_extensions_installed_from_shop(env, config: Dict[str, str]): def get_page_info(env, config: Dict[str, str]): host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port url = config["url"] remote_debugging_url = f"http://{host}:{port}" @@ -478,7 +480,7 @@ def get_page_info(env, config: Dict[str, str]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) @@ -505,7 +507,8 @@ def get_page_info(env, config: Dict[str, str]): def get_open_tabs_info(env, config: Dict[str, str]): host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -528,7 +531,7 @@ def get_open_tabs_info(env, config: Dict[str, str]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) try: browser = p.chromium.connect_over_cdp(remote_debugging_url) @@ -643,7 +646,7 @@ def get_active_tab_info(env, config: Dict[str, str]): logger.error("Failed to get the url of active tab") return None host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -683,7 +686,8 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str: _path = os.path.join(env.cache_dir, config["dest"]) host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" @@ -706,7 +710,7 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str: ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) @@ -721,7 +725,8 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str: # fixme: needs to be changed (maybe through post-processing) since it's not working def get_chrome_saved_address(env, config: Dict[str, str]): host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -744,7 +749,7 @@ def get_chrome_saved_address(env, config: Dict[str, str]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) @@ -800,7 +805,8 @@ def get_number_of_search_results(env, config: Dict[str, str]): # todo: move into the config file url, result_selector = "https://google.com/search?q=query", '.search-result' host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -822,7 +828,7 @@ def get_number_of_search_results(env, config: Dict[str, str]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) page = browser.new_page() @@ -1145,7 +1151,8 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]): logger.error("active_tab_url is not a string") return None host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -1168,7 +1175,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) target_page = None @@ -1237,7 +1244,8 @@ def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any]): especially used for www.recreation.gov examples """ host = env.vm_ip - port = 9222 # fixme: this port is hard-coded, need to be changed from config file + port = env.chromium_port # fixme: this port is hard-coded, need to be changed from config file + server_port = env.server_port remote_debugging_url = f"http://{host}:{port}" with sync_playwright() as p: @@ -1259,7 +1267,7 @@ def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any]): ], "shell": False}) headers = {"Content-Type": "application/json"} - requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload) time.sleep(5) browser = p.chromium.connect_over_cdp(remote_debugging_url) page = browser.new_page() diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index d5d965c..2f5ed32 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -7,7 +7,7 @@ logger = logging.getLogger("desktopenv.getters.general") def get_vm_command_line(env, config: Dict[str, str]): vm_ip = env.vm_ip - port = 5000 + port = env.server_port command = config["command"] shell = config.get("shell", False) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index 36dfbf1..e4b09a1 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -25,7 +25,9 @@ logger.setLevel(logging.INFO) MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 -UBUNTU_X86_URL = "docker-osworld-x86" + +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download # if platform.system() == 'Darwin': # macOS @@ -37,7 +39,6 @@ UBUNTU_X86_URL = "docker-osworld-x86" # URL = UBUNTU_X86_URL # else: # raise Exception("Unsupported platform or architecture") - URL = UBUNTU_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] @@ -46,10 +47,7 @@ if platform.system() == 'Windows': docker_path = r"C:\Program Files\Docker\Docker" os.environ["PATH"] += os.pathsep + docker_path -UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" -VMS_DIR = "./vmware_vm_data" - -def __download_vm(vms_dir: str): +def _download_vm(vms_dir: str): # Download the virtual machine image logger.info("Downloading the virtual machine image...") downloaded_size = 0 @@ -58,6 +56,8 @@ def __download_vm(vms_dir: str): DOWNLOADED_FILE_NAME = URL.split('/')[-1] downloaded_file_name = DOWNLOADED_FILE_NAME + os.makedirs(vms_dir, exist_ok=True) + while True: downloaded_file_path = os.path.join(vms_dir, downloaded_file_name) headers = {} @@ -99,7 +99,25 @@ class DockerVMManager(VMManager): def __init__(self, registry_path=""): pass - def get_vm_path(self, region): + def add_vm(self, vm_path): + pass + + def check_and_clean(self): + pass + + def delete_vm(self, vm_path): + pass + + def initialize_registry(self): + pass + + def list_free_vms(self): + return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) + + def occupy_vm(self, vm_path): + pass + + def get_vm_path(self, os_type, region): if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): - __download_vm(VMS_DIR) + _download_vm(VMS_DIR) return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 6920a9e..9d09d69 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -20,7 +20,7 @@ class DockerProvider(Provider): self.server_port = self._get_available_port(5000) # self.remote_debugging_port = self._get_available_port(1337) self.chromium_port = self._get_available_port(9222) - self.environment = {"DISK_SIZE": "64G", "RAM_SIZE": "4G", "CPU_CORES": "2"} # Modify if needed + self.environment = {"DISK_SIZE": "64G", "RAM_SIZE": "8G", "CPU_CORES": "8"} # Modify if needed @staticmethod def _get_available_port(port: int): @@ -30,8 +30,9 @@ class DockerProvider(Provider): port += 1 def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): + logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") # self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) - self.container = self.client.containers.run(path_to_vm, environment=self.environment, cap_add=["NET_ADMIN"], volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) + self.container = self.client.containers.run("osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) def get_ip_address(self, path_to_vm: str) -> str: return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}" @@ -47,4 +48,4 @@ class DockerProvider(Provider): self.container.stop(WAIT_TIME) self.container.remove() -# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume C:\Users\admin\Documents\Ubuntu.qcow2:/boot.qcow2 --cap-add NET_ADMIN --device /dev/kvm -p 8006:8006 -p 5000:5000 qemux/qemu-docker \ No newline at end of file +# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume C:\Users\admin\Documents\OSWorld\docker_vm_data\Ubuntu.qcow2:/Ubuntu.qcow2:ro --cap-add NET_ADMIN --device /dev/kvm -p 8006:8006 -p 5000:5000 osworld-docker \ No newline at end of file diff --git a/main.py b/main.py index 4c6817f..6f03227 100644 --- a/main.py +++ b/main.py @@ -78,7 +78,7 @@ def human_agent(): result = env.evaluate() logger.info("Result: %.2f", result) - # env.close() + env.close() logger.info("Environment closed.") diff --git a/requirements.txt b/requirements.txt index 439f85b..2cc96ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,7 +53,3 @@ wrapt_timeout_decorator gdown tiktoken groq -boto3 -azure-identity -azure-mgmt-compute -azure-mgmt-network From 2b4b6d8fdf8b3177ce533cf31db8fe84dcde371e Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Sat, 28 Sep 2024 15:07:56 +0800 Subject: [PATCH 06/22] Update --- desktop_env/providers/docker/provider.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 9d09d69..6dcca46 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -32,7 +32,8 @@ class DockerProvider(Provider): def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") # self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) - self.container = self.client.containers.run("osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) + self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) + time.sleep(20) def get_ip_address(self, path_to_vm: str) -> str: return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}" From 0a21e24a81411414651d58740b817b2c8fe600a1 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Sat, 28 Sep 2024 16:57:58 +0800 Subject: [PATCH 07/22] Update provider --- desktop_env/providers/docker/provider.py | 49 ++++++++++++++++-------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 6dcca46..9ceab84 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -5,14 +5,16 @@ import subprocess import time import docker import psutil - +import requests + from desktop_env.providers.base import Provider - + logger = logging.getLogger("desktopenv.providers.vmware.VMwareProvider") logger.setLevel(logging.INFO) - + WAIT_TIME = 3 - +RETRY_INTERVAL = 1 + class DockerProvider(Provider): def __init__(self, region: str): self.client = docker.from_env() @@ -20,33 +22,46 @@ class DockerProvider(Provider): self.server_port = self._get_available_port(5000) # self.remote_debugging_port = self._get_available_port(1337) self.chromium_port = self._get_available_port(9222) - self.environment = {"DISK_SIZE": "64G", "RAM_SIZE": "8G", "CPU_CORES": "8"} # Modify if needed - + self.environment = {"DISK_SIZE": "32G", "RAM_SIZE": "4G", "CPU_CORES": "4"} # Modify if needed + @staticmethod def _get_available_port(port: int): while port < 65354: if port not in [conn.laddr.port for conn in psutil.net_connections()]: return port port += 1 - + def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") - # self.container = self.client.containers.run('qemux/qemu-docker', environment={"DISK_SIZE": "64G", "RAM_SIZE": "6G", "CPU_CORES": "8"}, volumes={"/Users/happysix/Programs/HKUNLP/Qemu/Ubuntu.qcow2": {"bind": "/Ubuntu.qcow2", "mode": "ro"}, "/Users/happysix/Programs/HKUNLP/Qemu/snapshot.qcow2": {"bind": "/boot.qcow2", "mode": "rw"}}, cap_add=["NET_ADMIN"], ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) - self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port}, detach=True) - time.sleep(20) - + self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) + def download_screenshot(ip, port): + url = f"http://{ip}:{port}/screenshot" + try: + # max trey times 1, max timeout 1 + response = requests.get(url, timeout=(10, 10)) + if response.status_code == 200: + return True + except Exception as e: + time.sleep(RETRY_INTERVAL) + return False + + # Try downloading the screenshot until successful + while not download_screenshot("localhost", self.server_port): + logger.info("Check whether the virtual machine is ready...") + def get_ip_address(self, path_to_vm: str) -> str: return f"localhost:{self.server_port}:{self.chromium_port}:{self.vnc_port}" - + def save_state(self, path_to_vm: str, snapshot_name: str): raise NotImplementedError("Not available for Docker.") - + def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): pass - + def stop_emulator(self, path_to_vm: str): logger.info("Stopping VM...") - self.container.stop(WAIT_TIME) + self.container.stop() self.container.remove() - -# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume C:\Users\admin\Documents\OSWorld\docker_vm_data\Ubuntu.qcow2:/Ubuntu.qcow2:ro --cap-add NET_ADMIN --device /dev/kvm -p 8006:8006 -p 5000:5000 osworld-docker \ No newline at end of file + time.sleep(WAIT_TIME) + +# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume /home/$USER/osworld/docker_vm_data/Ubuntu.qcow2:/Ubuntu.qcow2:ro --cap-add NET_ADMIN --device /dev/kvm -p 8008:8006 -p 5002:5000 happysixd/osworld-docker \ No newline at end of file From fd65cf47f63b4041ec5e15f03729bc95677c97c3 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Wed, 2 Oct 2024 12:19:01 +0800 Subject: [PATCH 08/22] Update Windows URL --- desktop_env/desktop_env.py | 2 +- desktop_env/providers/docker/manager.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index 9b24d05..7d7b409 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -36,7 +36,7 @@ class DesktopEnv(gym.Env): headless: bool = False, require_a11y_tree: bool = True, require_terminal: bool = False, - os_type: str = "Ubuntu", + os_type: str = "Windows", ): """ Args: diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index e4b09a1..f3cb2d3 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -27,6 +27,7 @@ MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +WINDOWS_X86_URL = "https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64%20-%20copy.qcow2" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -39,21 +40,20 @@ VMS_DIR = "./docker_vm_data" # URL = UBUNTU_X86_URL # else: # raise Exception("Unsupported platform or architecture") -URL = UBUNTU_X86_URL +URL = UBUNTU_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] if platform.system() == 'Windows': docker_path = r"C:\Program Files\Docker\Docker" os.environ["PATH"] += os.pathsep + docker_path -def _download_vm(vms_dir: str): +def _download_vm(vms_dir: str, os_type: str): + global URL, DOWNLOADED_FILE_NAME # Download the virtual machine image logger.info("Downloading the virtual machine image...") downloaded_size = 0 - - URL = UBUNTU_X86_URL - DOWNLOADED_FILE_NAME = URL.split('/')[-1] + downloaded_file_name = DOWNLOADED_FILE_NAME os.makedirs(vms_dir, exist_ok=True) @@ -118,6 +118,11 @@ class DockerVMManager(VMManager): pass def get_vm_path(self, os_type, region): + if os_type == "Ubuntu": + URL = WINDOWS_X86_URL + elif os_type == "Windows": + URL = WINDOWS_X86_URL + DOWNLOADED_FILE_NAME = URL.split('/')[-1] if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): _download_vm(VMS_DIR) return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file From b9339217efd090fb4b85cd09d61cb27bf9d0a5db Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Thu, 3 Oct 2024 16:09:12 +0800 Subject: [PATCH 09/22] Update --- desktop_env/providers/docker/manager.py | 5 +++-- desktop_env/providers/docker/provider.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index f3cb2d3..f22b03c 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -27,7 +27,7 @@ MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" -WINDOWS_X86_URL = "https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64%20-%20copy.qcow2" +WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64.qcow2" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -48,7 +48,7 @@ if platform.system() == 'Windows': docker_path = r"C:\Program Files\Docker\Docker" os.environ["PATH"] += os.pathsep + docker_path -def _download_vm(vms_dir: str, os_type: str): +def _download_vm(vms_dir: str): global URL, DOWNLOADED_FILE_NAME # Download the virtual machine image logger.info("Downloading the virtual machine image...") @@ -118,6 +118,7 @@ class DockerVMManager(VMManager): pass def get_vm_path(self, os_type, region): + global URL, DOWNLOADED_FILE_NAME if os_type == "Ubuntu": URL = WINDOWS_X86_URL elif os_type == "Windows": diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 9ceab84..b3804dc 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -33,7 +33,8 @@ class DockerProvider(Provider): def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") - self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) + print(path_to_vm) + self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) def download_screenshot(ip, port): url = f"http://{ip}:{port}/screenshot" try: From 3cba868ff39c9360c4f2473c6698b2ab33cec99b Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Tue, 8 Oct 2024 17:57:48 +0800 Subject: [PATCH 10/22] Update --- desktop_env/providers/docker/manager.py | 4 ++-- desktop_env/providers/docker/provider.py | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index f22b03c..04ab5a1 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -27,7 +27,7 @@ MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" -WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64.qcow2" +WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64%20-%20copy.qcow2" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -53,7 +53,7 @@ def _download_vm(vms_dir: str): # Download the virtual machine image logger.info("Downloading the virtual machine image...") downloaded_size = 0 - + downloaded_file_name = DOWNLOADED_FILE_NAME os.makedirs(vms_dir, exist_ok=True) diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index b3804dc..443e736 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -33,8 +33,8 @@ class DockerProvider(Provider): def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") - print(path_to_vm) - self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) + uefi_rom_file = os.path.join(os.path.dirname(path_to_vm), "edk2-x86_64-code.fd") + self.container = self.client.containers.run("osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}, os.path.abspath(uefi_rom_file): {"bind": "/storage/edk2-x86_64-code.fd", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) def download_screenshot(ip, port): url = f"http://{ip}:{port}/screenshot" try: @@ -63,6 +63,4 @@ class DockerProvider(Provider): logger.info("Stopping VM...") self.container.stop() self.container.remove() - time.sleep(WAIT_TIME) - -# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume /home/$USER/osworld/docker_vm_data/Ubuntu.qcow2:/Ubuntu.qcow2:ro --cap-add NET_ADMIN --device /dev/kvm -p 8008:8006 -p 5002:5000 happysixd/osworld-docker \ No newline at end of file + time.sleep(WAIT_TIME) \ No newline at end of file From 6e75e37eb0760e7fb27ea35909b3e738467e9107 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Thu, 17 Oct 2024 13:05:29 +0800 Subject: [PATCH 11/22] Enable Windows VM in Docker --- desktop_env/providers/docker/manager.py | 4 ++-- desktop_env/providers/docker/provider.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index 04ab5a1..58a52aa 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -27,7 +27,7 @@ MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" -WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows%2010%20x64%20-%20copy.qcow2" +WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows-10-x64.qcow2" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -120,7 +120,7 @@ class DockerVMManager(VMManager): def get_vm_path(self, os_type, region): global URL, DOWNLOADED_FILE_NAME if os_type == "Ubuntu": - URL = WINDOWS_X86_URL + URL = UBUNTU_X86_URL elif os_type == "Windows": URL = WINDOWS_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 443e736..bc382e7 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -33,8 +33,7 @@ class DockerProvider(Provider): def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") - uefi_rom_file = os.path.join(os.path.dirname(path_to_vm), "edk2-x86_64-code.fd") - self.container = self.client.containers.run("osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}, os.path.abspath(uefi_rom_file): {"bind": "/storage/edk2-x86_64-code.fd", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) + self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) def download_screenshot(ip, port): url = f"http://{ip}:{port}/screenshot" try: From ec3671ae014fff6bddedf4fc2b462a43ac59aecc Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Thu, 17 Oct 2024 14:55:20 +0800 Subject: [PATCH 12/22] Update Docker image link --- desktop_env/providers/docker/manager.py | 10 +++++++++- desktop_env/providers/docker/provider.py | 11 +++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index 58a52aa..c06402a 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -27,7 +27,7 @@ MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" -WINDOWS_X86_URL = r"https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows-10-x64.qcow2" +WINDOWS_X86_URL = "https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows-10-x64.qcow2.zip" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -95,6 +95,12 @@ def _download_vm(vms_dir: str): logger.info("Download succeeds.") break # Download completed successfully + # Unzip the downloaded file + logger.info("Unzipping the downloaded file...☕️") + with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: + zip_ref.extractall(vms_dir) + logger.info("Files have been successfully extracted to the directory: " + str(vms_dir)) + class DockerVMManager(VMManager): def __init__(self, registry_path=""): pass @@ -124,6 +130,8 @@ class DockerVMManager(VMManager): elif os_type == "Windows": URL = WINDOWS_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] + if DOWNLOADED_FILE_NAME.endswith(".zip"): + DOWNLOADED_FILE_NAME = DOWNLOADED_FILE_NAME[:-4] if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): _download_vm(VMS_DIR) return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index bc382e7..f47df09 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -18,10 +18,9 @@ RETRY_INTERVAL = 1 class DockerProvider(Provider): def __init__(self, region: str): self.client = docker.from_env() - self.vnc_port = self._get_available_port(8006) - self.server_port = self._get_available_port(5000) - # self.remote_debugging_port = self._get_available_port(1337) - self.chromium_port = self._get_available_port(9222) + self.server_port = None + self.vnc_port = None + self.chromium_port = None self.environment = {"DISK_SIZE": "32G", "RAM_SIZE": "4G", "CPU_CORES": "4"} # Modify if needed @staticmethod @@ -32,6 +31,10 @@ class DockerProvider(Provider): port += 1 def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): + self.vnc_port = self._get_available_port(8006) + self.server_port = self._get_available_port(5000) + # self.remote_debugging_port = self._get_available_port(1337) + self.chromium_port = self._get_available_port(9222) logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) def download_screenshot(ip, port): From 7eaa4189ae86285ba07795f51b6fd0a2e9c868b4 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Thu, 17 Oct 2024 19:15:37 +0800 Subject: [PATCH 13/22] Fix unzip --- desktop_env/providers/docker/manager.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index c06402a..4a7abde 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -95,11 +95,12 @@ def _download_vm(vms_dir: str): logger.info("Download succeeds.") break # Download completed successfully - # Unzip the downloaded file - logger.info("Unzipping the downloaded file...☕️") - with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: - zip_ref.extractall(vms_dir) - logger.info("Files have been successfully extracted to the directory: " + str(vms_dir)) + if downloaded_file_name.endswith(".zip"): + # Unzip the downloaded file + logger.info("Unzipping the downloaded file...☕️") + with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: + zip_ref.extractall(vms_dir) + logger.info("Files have been successfully extracted to the directory: " + str(vms_dir)) class DockerVMManager(VMManager): def __init__(self, registry_path=""): @@ -130,8 +131,12 @@ class DockerVMManager(VMManager): elif os_type == "Windows": URL = WINDOWS_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] + if DOWNLOADED_FILE_NAME.endswith(".zip"): - DOWNLOADED_FILE_NAME = DOWNLOADED_FILE_NAME[:-4] - if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): + vm_name = DOWNLOADED_FILE_NAME[:-4] + else: + vm_name = DOWNLOADED_FILE_NAME + + if not os.path.exists(os.path.join(VMS_DIR, vm_name)): _download_vm(VMS_DIR) - return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file + return os.path.join(VMS_DIR, vm_name) \ No newline at end of file From 9e86f160e7d88660d6a8a81efdb56f98e751dbf4 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 18 Oct 2024 18:44:53 +0800 Subject: [PATCH 14/22] Capture cursor on Windows --- desktop_env/server/main.py | 62 +++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 7ba8800..f2bf187 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -14,7 +14,7 @@ import lxml.etree import pyautogui import requests import re -from PIL import Image +from PIL import Image, ImageGrab from Xlib import display, X from flask import Flask, request, jsonify, send_file, abort # , send_from_directory from lxml.etree import _Element @@ -35,6 +35,7 @@ elif platform_name == "Windows": from pywinauto import Desktop from pywinauto.base_wrapper import BaseWrapper import pywinauto.application + import win32ui, win32gui Accessible = Any @@ -88,7 +89,7 @@ def execute_command(): # Execute the command without any safety checks. try: result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, - timeout=120) + timeout=120, creationflags=subprocess.CREATE_NO_WINDOW) return jsonify({ 'status': 'success', 'output': result.stdout, @@ -150,22 +151,47 @@ def capture_screen_with_cursor(): # fixme: This is a temporary fix for the cursor not being captured on Windows and Linux if user_platform == "Windows": - def _download_image(url, path): - response = requests.get(url) - with open(path, 'wb') as file: - file.write(response.content) - - cursor_path = os.path.join("screenshots", "cursor.png") - if not os.path.exists(cursor_path): - cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png" - _download_image(cursor_url, cursor_path) - screenshot = pyautogui.screenshot() - cursor_x, cursor_y = pyautogui.position() - cursor = Image.open(cursor_path) - # make the cursor smaller - cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5))) - screenshot.paste(cursor, (cursor_x, cursor_y), cursor) - screenshot.save(file_path) + def get_cursor(): + hcursor = win32gui.GetCursorInfo()[1] + hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0)) + hbmp = win32ui.CreateBitmap() + hbmp.CreateCompatibleBitmap(hdc, 36, 36) + hdc = hdc.CreateCompatibleDC() + hdc.SelectObject(hbmp) + hdc.DrawIcon((0,0), hcursor) + + bmpinfo = hbmp.GetInfo() + bmpstr = hbmp.GetBitmapBits(True) + cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA") + + win32gui.DestroyIcon(hcursor) + win32gui.DeleteObject(hbmp.GetHandle()) + hdc.DeleteDC() + + pixdata = cursor.load() + + width, height = cursor.size + for y in range(height): + for x in range(width): + if pixdata[x, y] == (0, 0, 0, 255): + pixdata[x, y] = (0, 0, 0, 0) + + hotspot = win32gui.GetIconInfo(hcursor)[1:3] + + return (cursor, hotspot) + + cursor, (hotspotx, hotspoty) = get_cursor() + + ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 + + img = ImageGrab.grab(bbox=None, include_layered_windows=True) + + pos_win = win32gui.GetCursorPos() + pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) + + img.paste(cursor, pos, cursor) + + img.save(file_path) elif user_platform == "Linux": cursor_obj = Xcursor() imgarray = cursor_obj.getCursorImageArrayFast() From b46b6f0649f9f7e1613069c01ddbb2fe58a2cb92 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 18 Oct 2024 18:47:10 +0800 Subject: [PATCH 15/22] Clean up --- desktop_env/server/main.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index f2bf187..07bffad 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -159,38 +159,38 @@ def capture_screen_with_cursor(): hdc = hdc.CreateCompatibleDC() hdc.SelectObject(hbmp) hdc.DrawIcon((0,0), hcursor) - + bmpinfo = hbmp.GetInfo() bmpstr = hbmp.GetBitmapBits(True) cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA") - - win32gui.DestroyIcon(hcursor) + + win32gui.DestroyIcon(hcursor) win32gui.DeleteObject(hbmp.GetHandle()) hdc.DeleteDC() - + pixdata = cursor.load() - + width, height = cursor.size for y in range(height): for x in range(width): if pixdata[x, y] == (0, 0, 0, 255): pixdata[x, y] = (0, 0, 0, 0) - + hotspot = win32gui.GetIconInfo(hcursor)[1:3] - + return (cursor, hotspot) - + cursor, (hotspotx, hotspoty) = get_cursor() - + ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 - + img = ImageGrab.grab(bbox=None, include_layered_windows=True) - + pos_win = win32gui.GetCursorPos() pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) - + img.paste(cursor, pos, cursor) - + img.save(file_path) elif user_platform == "Linux": cursor_obj = Xcursor() From 82878c885cc7950c96748e2af85c058b6ed09788 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Fri, 18 Oct 2024 20:17:49 +0800 Subject: [PATCH 16/22] Update Ubuntu qcow2 link --- desktop_env/providers/docker/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index 4a7abde..1172167 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 -UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2.zip" WINDOWS_X86_URL = "https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows-10-x64.qcow2.zip" VMS_DIR = "./docker_vm_data" From e9dbc3c37466da111db5ce9a98806321088c49ad Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Tue, 22 Oct 2024 20:42:27 +0800 Subject: [PATCH 17/22] Update docs --- README.md | 4 +++ .../providers/docker/DOCKER_GUIDELINE.md | 27 +++++++++++++++++++ desktop_env/server/main.py | 13 +++++---- 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 desktop_env/providers/docker/DOCKER_GUIDELINE.md diff --git a/README.md b/README.md index b4b566f..d991aa0 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ ## 📢 Updates +- 2024-10-22: We supported Docker for hosting virtual machines on virtualized platforms. Check below for detailed instructions! - 2024-06-15: We refactor the code of environment part to decompose VMware Integration, and start to support other platforms such as VitualBox, AWS, Azure, etc. Hold tight! - 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out! @@ -76,6 +77,9 @@ See [AWS_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/pr #### On your Azure We have finished the support for Azure but not yet fully tested. +### On Docker +See [DOCKER_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/docker/DOCKER_GUIDELINE.md) for using Docker to host virtual machines. + #### Others We are working on supporting more 👷. Please hold tight! diff --git a/desktop_env/providers/docker/DOCKER_GUIDELINE.md b/desktop_env/providers/docker/DOCKER_GUIDELINE.md new file mode 100644 index 0000000..4e2b5d1 --- /dev/null +++ b/desktop_env/providers/docker/DOCKER_GUIDELINE.md @@ -0,0 +1,27 @@ +# Configuration of Docker + +--- + +Welcome to the Docker VM Management documentation. + +## Prerequisite: Check if your machine supports KVM + +We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run + +``` +egrep -c '(vmx|svm)' /proc/cpuinfo +``` + +on Linux. If the return value is greater than zero, the processor should be able to support KVM. + +> **Note**: macOS hosts generally do not support KVM. + +## Install Docker + +If your hosting platform supports graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). + +## Running Experiments + +Add the following arguments when initializing `DesktopEnv`: +- `provider`: `docker` +- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 07bffad..cafe06a 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -180,16 +180,19 @@ def capture_screen_with_cursor(): return (cursor, hotspot) - cursor, (hotspotx, hotspoty) = get_cursor() - ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 img = ImageGrab.grab(bbox=None, include_layered_windows=True) - pos_win = win32gui.GetCursorPos() - pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) + try: + cursor, (hotspotx, hotspoty) = get_cursor() - img.paste(cursor, pos, cursor) + pos_win = win32gui.GetCursorPos() + pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) + + img.paste(cursor, pos, cursor) + except: + pass img.save(file_path) elif user_platform == "Linux": From 6635e8f3fdfdaf199e6f97632243f0beccdab8cb Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Tue, 22 Oct 2024 20:47:39 +0800 Subject: [PATCH 18/22] Minor update on docs --- desktop_env/providers/docker/DOCKER_GUIDELINE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/desktop_env/providers/docker/DOCKER_GUIDELINE.md b/desktop_env/providers/docker/DOCKER_GUIDELINE.md index 4e2b5d1..eb0cbde 100644 --- a/desktop_env/providers/docker/DOCKER_GUIDELINE.md +++ b/desktop_env/providers/docker/DOCKER_GUIDELINE.md @@ -25,3 +25,5 @@ If your hosting platform supports graphical user interface (GUI), you may refer Add the following arguments when initializing `DesktopEnv`: - `provider`: `docker` - `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM + +Please allow for some time to download the virtual machine snapshot on your first run. From a895757450dbab817e648a95de8b80948a8f6009 Mon Sep 17 00:00:00 2001 From: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com> Date: Tue, 22 Oct 2024 22:12:50 +0800 Subject: [PATCH 19/22] Update README.md --- README.md | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d991aa0..b764281 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,14 @@ ## 📢 Updates -- 2024-10-22: We supported Docker for hosting virtual machines on virtualized platforms. Check below for detailed instructions! +- 2024-10-22: We supported Docker🐳 for hosting virtual machines on virtualized platforms. Check below for detailed instructions! - 2024-06-15: We refactor the code of environment part to decompose VMware Integration, and start to support other platforms such as VitualBox, AWS, Azure, etc. Hold tight! - 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out! ## 💾 Installation -### On Your Desktop or Server (Non-Virtualized Platform) -Suppose you are operating on a system that has not been virtualized, meaning you are not utilizing a virtualized environment like AWS, Azure, or k8s. If this is the case, proceed with the instructions below. However, if you are on a virtualized platform, please refer to the [virtualized platform](https://github.com/xlang-ai/OSWorld?tab=readme-ov-file#virtualized-platform) section. +### VMware/VirtualBox (Desktop, Laptop, Bare Metal Machine) +Suppose you are operating on a system that has not been virtualized (e.g. your desktop, laptop, bare metal machine), meaning you are not utilizing a virtualized environment like AWS, Azure, or k8s. +If this is the case, proceed with the instructions below. However, if you are on a virtualized platform, please refer to the [virtualized platform](https://github.com/xlang-ai/OSWorld?tab=readme-ov-file#virtualized-platform) section. 1. First, clone this repository and `cd` into it. Then, install the dependencies listed in `requirements.txt`. It is recommended that you use the latest version of Conda to manage the environment, but you can also choose to manually install the dependencies. Please ensure that the version of Python is >= 3.9. ```bash @@ -70,17 +71,29 @@ If the installation along with the environment variable set is successful, you w All set! Our setup script will automatically download the necessary virtual machines and configure the environment for you. -### On AWS or Azure (Virtualized platform) -#### On your AWS -See [AWS_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/aws/AWS_GUIDELINE.md) for using AWS as the virtualized platform. Please carefully go through the guideline and choose the proper instance type and region. +### Docker (Server (with KVM Support for the better)) +If you are running on a non-bare metal server, or prefer not to use VMware and VirtualBox platforms, we recommend using our Docker support. -#### On your Azure -We have finished the support for Azure but not yet fully tested. +#### Prerequisite: Check if your machine supports KVM +We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run +``` +egrep -c '(vmx|svm)' /proc/cpuinfo +``` +on Linux. If the return value is greater than zero, the processor should be able to support KVM. +> **Note**: macOS hosts generally do not support KVM. -### On Docker -See [DOCKER_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/docker/DOCKER_GUIDELINE.md) for using Docker to host virtual machines. +#### Install Docker +If your hosting platform supports a graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). -#### Others +#### Running Experiments +Add the following arguments when initializing `DesktopEnv`: +- `provider`: `docker` +- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM + +See [DOCKER_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/docker/DOCKER_GUIDELINE.md) for more details of using Docker to host virtual machines. + + +### Others We are working on supporting more 👷. Please hold tight! From 275de550b41d0daed808f370f165134eb99aa017 Mon Sep 17 00:00:00 2001 From: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com> Date: Tue, 22 Oct 2024 22:31:42 +0800 Subject: [PATCH 20/22] Set the default setting back to vmware and Ubuntu, since people may would try from desktop first --- desktop_env/desktop_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index 7d7b409..cf05d03 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -26,7 +26,7 @@ class DesktopEnv(gym.Env): def __init__( self, - provider_name: str = "docker", + provider_name: str = "vmware", region: str = None, path_to_vm: str = None, snapshot_name: str = "init_state", @@ -36,7 +36,7 @@ class DesktopEnv(gym.Env): headless: bool = False, require_a11y_tree: bool = True, require_terminal: bool = False, - os_type: str = "Windows", + os_type: str = "Ubuntu", ): """ Args: From 278fe6b7c9dea2f232bc78b132ac372400561206 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Tue, 22 Oct 2024 22:34:22 +0800 Subject: [PATCH 21/22] Merge Docker guidelines into Readme --- README.md | 3 -- .../providers/docker/DOCKER_GUIDELINE.md | 29 ------------------- 2 files changed, 32 deletions(-) delete mode 100644 desktop_env/providers/docker/DOCKER_GUIDELINE.md diff --git a/README.md b/README.md index b764281..b535821 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,6 @@ Add the following arguments when initializing `DesktopEnv`: - `provider`: `docker` - `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM -See [DOCKER_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/docker/DOCKER_GUIDELINE.md) for more details of using Docker to host virtual machines. - - ### Others We are working on supporting more 👷. Please hold tight! diff --git a/desktop_env/providers/docker/DOCKER_GUIDELINE.md b/desktop_env/providers/docker/DOCKER_GUIDELINE.md deleted file mode 100644 index eb0cbde..0000000 --- a/desktop_env/providers/docker/DOCKER_GUIDELINE.md +++ /dev/null @@ -1,29 +0,0 @@ -# Configuration of Docker - ---- - -Welcome to the Docker VM Management documentation. - -## Prerequisite: Check if your machine supports KVM - -We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run - -``` -egrep -c '(vmx|svm)' /proc/cpuinfo -``` - -on Linux. If the return value is greater than zero, the processor should be able to support KVM. - -> **Note**: macOS hosts generally do not support KVM. - -## Install Docker - -If your hosting platform supports graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). - -## Running Experiments - -Add the following arguments when initializing `DesktopEnv`: -- `provider`: `docker` -- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM - -Please allow for some time to download the virtual machine snapshot on your first run. From 954a78be363326e23a1ff0840609258ff9db6785 Mon Sep 17 00:00:00 2001 From: FredWuCZ Date: Tue, 22 Oct 2024 22:37:46 +0800 Subject: [PATCH 22/22] Update Docker guidelines --- .../providers/docker/DOCKER_GUIDELINE.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 desktop_env/providers/docker/DOCKER_GUIDELINE.md diff --git a/desktop_env/providers/docker/DOCKER_GUIDELINE.md b/desktop_env/providers/docker/DOCKER_GUIDELINE.md new file mode 100644 index 0000000..eb0cbde --- /dev/null +++ b/desktop_env/providers/docker/DOCKER_GUIDELINE.md @@ -0,0 +1,29 @@ +# Configuration of Docker + +--- + +Welcome to the Docker VM Management documentation. + +## Prerequisite: Check if your machine supports KVM + +We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run + +``` +egrep -c '(vmx|svm)' /proc/cpuinfo +``` + +on Linux. If the return value is greater than zero, the processor should be able to support KVM. + +> **Note**: macOS hosts generally do not support KVM. + +## Install Docker + +If your hosting platform supports graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). + +## Running Experiments + +Add the following arguments when initializing `DesktopEnv`: +- `provider`: `docker` +- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM + +Please allow for some time to download the virtual machine snapshot on your first run.