diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 8bc1323..245054e 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -38,13 +38,17 @@ def execute_command(): @app.route('/screenshot', methods=['GET']) def capture_screen_with_cursor(): + # fixme: when running on virtual machines, the cursor is not captured, don't know why + file_path = os.path.join("screenshots", "screenshot.png") user_platform = platform.system() # Ensure the screenshots directory exists os.makedirs(os.path.dirname(file_path), exist_ok=True) - if user_platform == "Windows": + + # fixme: This is a temporary fix for the cursor not being captured on Windows and Linux + if user_platform == "Windows" or user_platform == "Linux": def _download_image(url, path): response = requests.get(url) with open(path, 'wb') as file: @@ -57,14 +61,16 @@ def capture_screen_with_cursor(): screenshot = pyautogui.screenshot() cursor_x, cursor_y = pyautogui.position() cursor = Image.open(cursor_path) + # make the cursor smaller + cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5))) screenshot.paste(cursor, (cursor_x, cursor_y), cursor) screenshot.save(file_path) - elif user_platform == "Linux": - # Use xlib to prevent scrot dependency for Linux - screen = Xlib.display.Display().screen() - size = screen.width_in_pixels, screen.height_in_pixels - screenshot = ImageGrab.grab(bbox=(0, 0, size[0], size[1])) - screenshot.save(file_path) + # elif user_platform == "Linux": + # # Use xlib to prevent scrot dependency for Linux + # screen = Xlib.display.Display().screen() + # size = screen.width_in_pixels, screen.height_in_pixels + # screenshot = ImageGrab.grab(bbox=(0, 0, size[0], size[1])) + # screenshot.save(file_path) elif user_platform == "Darwin": # (Mac OS) # Use the screencapture utility to capture the screen with the cursor subprocess.run(["screencapture", "-C", file_path]) diff --git a/desktop_env/server/requirements.txt b/desktop_env/server/requirements.txt index f4cb1ab..8b96205 100644 --- a/desktop_env/server/requirements.txt +++ b/desktop_env/server/requirements.txt @@ -3,3 +3,4 @@ PyAutoGUI==0.9.54 Pillow==10.1.0 git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon requests +flask diff --git a/main.py b/main.py index d7c3073..e8658a7 100644 --- a/main.py +++ b/main.py @@ -12,13 +12,13 @@ def human_agent(): env = DesktopEnv( # path_to_vm=r"""C:\Users\tianbaox\Downloads\Windows 10 x64\Windows 10 x64.vmx""", - path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""", + path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx""", # path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", action_space="computer_13", - snapshot_path="base_setup3", + snapshot_path="base_setup", instruction=example["instruction"], - config=example["config"], - evaluator=example["evaluator"] + # config=example["config"], + # evaluator=example["evaluator"] ) # reset the environment to certain snapshot