diff --git a/README.md b/README.md index b4b566f..b535821 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,14 @@ ## 📢 Updates +- 2024-10-22: We supported Docker🐳 for hosting virtual machines on virtualized platforms. Check below for detailed instructions! - 2024-06-15: We refactor the code of environment part to decompose VMware Integration, and start to support other platforms such as VitualBox, AWS, Azure, etc. Hold tight! - 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out! ## 💾 Installation -### On Your Desktop or Server (Non-Virtualized Platform) -Suppose you are operating on a system that has not been virtualized, meaning you are not utilizing a virtualized environment like AWS, Azure, or k8s. If this is the case, proceed with the instructions below. However, if you are on a virtualized platform, please refer to the [virtualized platform](https://github.com/xlang-ai/OSWorld?tab=readme-ov-file#virtualized-platform) section. +### VMware/VirtualBox (Desktop, Laptop, Bare Metal Machine) +Suppose you are operating on a system that has not been virtualized (e.g. your desktop, laptop, bare metal machine), meaning you are not utilizing a virtualized environment like AWS, Azure, or k8s. +If this is the case, proceed with the instructions below. However, if you are on a virtualized platform, please refer to the [virtualized platform](https://github.com/xlang-ai/OSWorld?tab=readme-ov-file#virtualized-platform) section. 1. First, clone this repository and `cd` into it. Then, install the dependencies listed in `requirements.txt`. It is recommended that you use the latest version of Conda to manage the environment, but you can also choose to manually install the dependencies. Please ensure that the version of Python is >= 3.9. ```bash @@ -69,14 +71,26 @@ If the installation along with the environment variable set is successful, you w All set! Our setup script will automatically download the necessary virtual machines and configure the environment for you. -### On AWS or Azure (Virtualized platform) -#### On your AWS -See [AWS_GUIDELINE](https://github.com/xlang-ai/OSWorld/blob/main/desktop_env/providers/aws/AWS_GUIDELINE.md) for using AWS as the virtualized platform. Please carefully go through the guideline and choose the proper instance type and region. +### Docker (Server (with KVM Support for the better)) +If you are running on a non-bare metal server, or prefer not to use VMware and VirtualBox platforms, we recommend using our Docker support. -#### On your Azure -We have finished the support for Azure but not yet fully tested. +#### Prerequisite: Check if your machine supports KVM +We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run +``` +egrep -c '(vmx|svm)' /proc/cpuinfo +``` +on Linux. If the return value is greater than zero, the processor should be able to support KVM. +> **Note**: macOS hosts generally do not support KVM. -#### Others +#### Install Docker +If your hosting platform supports a graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). + +#### Running Experiments +Add the following arguments when initializing `DesktopEnv`: +- `provider`: `docker` +- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM + +### Others We are working on supporting more 👷. Please hold tight! diff --git a/desktop_env/providers/docker/DOCKER_GUIDELINE.md b/desktop_env/providers/docker/DOCKER_GUIDELINE.md new file mode 100644 index 0000000..eb0cbde --- /dev/null +++ b/desktop_env/providers/docker/DOCKER_GUIDELINE.md @@ -0,0 +1,29 @@ +# Configuration of Docker + +--- + +Welcome to the Docker VM Management documentation. + +## Prerequisite: Check if your machine supports KVM + +We recommend running the VM with KVM support. To check if your hosting platform supports KVM, run + +``` +egrep -c '(vmx|svm)' /proc/cpuinfo +``` + +on Linux. If the return value is greater than zero, the processor should be able to support KVM. + +> **Note**: macOS hosts generally do not support KVM. + +## Install Docker + +If your hosting platform supports graphical user interface (GUI), you may refer to [Install Docker Desktop on Linux](https://docs.docker.com/desktop/install/linux/) or [Install Docker Desktop on Windows](https://docs.docker.com/desktop/install/windows-install/) based on your OS. Otherwise, you may [Install Docker Engine](https://docs.docker.com/engine/install/). + +## Running Experiments + +Add the following arguments when initializing `DesktopEnv`: +- `provider`: `docker` +- `os_type`: `Ubuntu` or `Windows`, depending on the OS of the VM + +Please allow for some time to download the virtual machine snapshot on your first run. diff --git a/desktop_env/providers/docker/manager.py b/desktop_env/providers/docker/manager.py index e4b09a1..1172167 100644 --- a/desktop_env/providers/docker/manager.py +++ b/desktop_env/providers/docker/manager.py @@ -26,7 +26,8 @@ logger.setLevel(logging.INFO) MAX_RETRY_TIMES = 10 RETRY_INTERVAL = 5 -UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2" +UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_osworld/resolve/main/Ubuntu.qcow2.zip" +WINDOWS_X86_URL = "https://huggingface.co/datasets/xlangai/windows_osworld/resolve/main/Windows-10-x64.qcow2.zip" VMS_DIR = "./docker_vm_data" # Determine the platform and CPU architecture to decide the correct VM image to download @@ -39,8 +40,8 @@ VMS_DIR = "./docker_vm_data" # URL = UBUNTU_X86_URL # else: # raise Exception("Unsupported platform or architecture") -URL = UBUNTU_X86_URL +URL = UBUNTU_X86_URL DOWNLOADED_FILE_NAME = URL.split('/')[-1] if platform.system() == 'Windows': @@ -48,12 +49,11 @@ if platform.system() == 'Windows': os.environ["PATH"] += os.pathsep + docker_path def _download_vm(vms_dir: str): + global URL, DOWNLOADED_FILE_NAME # Download the virtual machine image logger.info("Downloading the virtual machine image...") downloaded_size = 0 - URL = UBUNTU_X86_URL - DOWNLOADED_FILE_NAME = URL.split('/')[-1] downloaded_file_name = DOWNLOADED_FILE_NAME os.makedirs(vms_dir, exist_ok=True) @@ -95,6 +95,13 @@ def _download_vm(vms_dir: str): logger.info("Download succeeds.") break # Download completed successfully + if downloaded_file_name.endswith(".zip"): + # Unzip the downloaded file + logger.info("Unzipping the downloaded file...☕️") + with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref: + zip_ref.extractall(vms_dir) + logger.info("Files have been successfully extracted to the directory: " + str(vms_dir)) + class DockerVMManager(VMManager): def __init__(self, registry_path=""): pass @@ -118,6 +125,18 @@ class DockerVMManager(VMManager): pass def get_vm_path(self, os_type, region): - if not os.path.exists(os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME)): + global URL, DOWNLOADED_FILE_NAME + if os_type == "Ubuntu": + URL = UBUNTU_X86_URL + elif os_type == "Windows": + URL = WINDOWS_X86_URL + DOWNLOADED_FILE_NAME = URL.split('/')[-1] + + if DOWNLOADED_FILE_NAME.endswith(".zip"): + vm_name = DOWNLOADED_FILE_NAME[:-4] + else: + vm_name = DOWNLOADED_FILE_NAME + + if not os.path.exists(os.path.join(VMS_DIR, vm_name)): _download_vm(VMS_DIR) - return os.path.join(VMS_DIR, DOWNLOADED_FILE_NAME) \ No newline at end of file + return os.path.join(VMS_DIR, vm_name) \ No newline at end of file diff --git a/desktop_env/providers/docker/provider.py b/desktop_env/providers/docker/provider.py index 9ceab84..f47df09 100644 --- a/desktop_env/providers/docker/provider.py +++ b/desktop_env/providers/docker/provider.py @@ -18,10 +18,9 @@ RETRY_INTERVAL = 1 class DockerProvider(Provider): def __init__(self, region: str): self.client = docker.from_env() - self.vnc_port = self._get_available_port(8006) - self.server_port = self._get_available_port(5000) - # self.remote_debugging_port = self._get_available_port(1337) - self.chromium_port = self._get_available_port(9222) + self.server_port = None + self.vnc_port = None + self.chromium_port = None self.environment = {"DISK_SIZE": "32G", "RAM_SIZE": "4G", "CPU_CORES": "4"} # Modify if needed @staticmethod @@ -32,8 +31,12 @@ class DockerProvider(Provider): port += 1 def start_emulator(self, path_to_vm: str, headless: bool, os_type: str): + self.vnc_port = self._get_available_port(8006) + self.server_port = self._get_available_port(5000) + # self.remote_debugging_port = self._get_available_port(1337) + self.chromium_port = self._get_available_port(9222) logger.info(f"Occupying ports: {self.vnc_port}, {self.server_port}, {self.chromium_port}") - self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/Ubuntu.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) + self.container = self.client.containers.run("happysixd/osworld-docker", environment=self.environment, cap_add=["NET_ADMIN"], devices=["/dev/kvm"], volumes={os.path.abspath(path_to_vm): {"bind": "/System.qcow2", "mode": "ro"}}, ports={8006: self.vnc_port, 5000: self.server_port, 9222: self.chromium_port}, detach=True) def download_screenshot(ip, port): url = f"http://{ip}:{port}/screenshot" try: @@ -62,6 +65,4 @@ class DockerProvider(Provider): logger.info("Stopping VM...") self.container.stop() self.container.remove() - time.sleep(WAIT_TIME) - -# docker run -it --rm -e "DISK_SIZE=64G" -e "RAM_SIZE=8G" -e "CPU_CORES=8" --volume /home/$USER/osworld/docker_vm_data/Ubuntu.qcow2:/Ubuntu.qcow2:ro --cap-add NET_ADMIN --device /dev/kvm -p 8008:8006 -p 5002:5000 happysixd/osworld-docker \ No newline at end of file + time.sleep(WAIT_TIME) \ No newline at end of file diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 7ba8800..cafe06a 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -14,7 +14,7 @@ import lxml.etree import pyautogui import requests import re -from PIL import Image +from PIL import Image, ImageGrab from Xlib import display, X from flask import Flask, request, jsonify, send_file, abort # , send_from_directory from lxml.etree import _Element @@ -35,6 +35,7 @@ elif platform_name == "Windows": from pywinauto import Desktop from pywinauto.base_wrapper import BaseWrapper import pywinauto.application + import win32ui, win32gui Accessible = Any @@ -88,7 +89,7 @@ def execute_command(): # Execute the command without any safety checks. try: result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, - timeout=120) + timeout=120, creationflags=subprocess.CREATE_NO_WINDOW) return jsonify({ 'status': 'success', 'output': result.stdout, @@ -150,22 +151,50 @@ def capture_screen_with_cursor(): # fixme: This is a temporary fix for the cursor not being captured on Windows and Linux if user_platform == "Windows": - def _download_image(url, path): - response = requests.get(url) - with open(path, 'wb') as file: - file.write(response.content) + def get_cursor(): + hcursor = win32gui.GetCursorInfo()[1] + hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0)) + hbmp = win32ui.CreateBitmap() + hbmp.CreateCompatibleBitmap(hdc, 36, 36) + hdc = hdc.CreateCompatibleDC() + hdc.SelectObject(hbmp) + hdc.DrawIcon((0,0), hcursor) - cursor_path = os.path.join("screenshots", "cursor.png") - if not os.path.exists(cursor_path): - cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png" - _download_image(cursor_url, cursor_path) - screenshot = pyautogui.screenshot() - cursor_x, cursor_y = pyautogui.position() - cursor = Image.open(cursor_path) - # make the cursor smaller - cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5))) - screenshot.paste(cursor, (cursor_x, cursor_y), cursor) - screenshot.save(file_path) + bmpinfo = hbmp.GetInfo() + bmpstr = hbmp.GetBitmapBits(True) + cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA") + + win32gui.DestroyIcon(hcursor) + win32gui.DeleteObject(hbmp.GetHandle()) + hdc.DeleteDC() + + pixdata = cursor.load() + + width, height = cursor.size + for y in range(height): + for x in range(width): + if pixdata[x, y] == (0, 0, 0, 255): + pixdata[x, y] = (0, 0, 0, 0) + + hotspot = win32gui.GetIconInfo(hcursor)[1:3] + + return (cursor, hotspot) + + ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 + + img = ImageGrab.grab(bbox=None, include_layered_windows=True) + + try: + cursor, (hotspotx, hotspoty) = get_cursor() + + pos_win = win32gui.GetCursorPos() + pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) + + img.paste(cursor, pos, cursor) + except: + pass + + img.save(file_path) elif user_platform == "Linux": cursor_obj = Xcursor() imgarray = cursor_obj.getCursorImageArrayFast() diff --git a/requirements.txt b/requirements.txt index 72ca6fc..0dcdca5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -57,4 +57,4 @@ boto3 azure-identity azure-mgmt-compute azure-mgmt-network -docker \ No newline at end of file +docker