From 487fb8005b8af873e844993d2a13024b8e7ef6fc Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sat, 2 Dec 2023 22:14:50 +0800 Subject: [PATCH] Improve: fix bugs; add back the cursor in screenshot; add pause in env.step --- desktop_env/controllers/python.py | 2 +- desktop_env/envs/desktop_env.py | 5 ++++- desktop_env/server/main.py | 26 +++++++++++++++++++++++++- desktop_env/server/requirements.txt | 1 + main.py | 8 +++++--- 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py index 39d1196..f2021c0 100644 --- a/desktop_env/controllers/python.py +++ b/desktop_env/controllers/python.py @@ -4,7 +4,7 @@ import requests class PythonController: - def __init__(self, http_server: str, pkgs_prefix: str = "py -c \"import pyautogui; {command}\""): + def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""): self.http_server = http_server self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index 3fbb92f..16b73cf 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -85,10 +85,13 @@ class DesktopEnv(gym.Env): observation = self._get_obs() return observation - def step(self, action): + def step(self, action, pause=0.5): # todo: support both the action space of our-designed space and the executable code space in pyautogui # Our action space is the set of all possible python commands insides `pyautogui` self.controller.execute_python_command(action) + + # todo: maybe for the better here we need to add a logic to wait until the rendering is done + time.sleep(pause) observation = self._get_obs() reward = 0 # todo: Define reward calculation for each example done = False # todo: Define episode termination condition for each example diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 228a08d..f9e8dcd 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -1,12 +1,14 @@ import os import platform import subprocess +import requests import Xlib.display import pyautogui -from PIL import ImageGrab +from PIL import ImageGrab, Image from flask import Flask, request, jsonify, send_file + app = Flask(__name__) pyautogui.PAUSE = 0 @@ -43,7 +45,19 @@ def capture_screen_with_cursor(): os.makedirs(os.path.dirname(file_path), exist_ok=True) if user_platform == "Windows": + def _download_image(url, path): + response = requests.get(url) + with open(path, 'wb') as file: + file.write(response.content) + + cursor_path = os.path.join("screenshots", "cursor.png") + if not os.path.exists(cursor_path): + cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png" + _download_image(cursor_url, cursor_path) screenshot = pyautogui.screenshot() + cursor_x, cursor_y = pyautogui.position() + cursor = Image.open(cursor_path) + screenshot.paste(cursor, (cursor_x, cursor_y), cursor) screenshot.save(file_path) elif user_platform == "Linux": # Use xlib to prevent scrot dependency for Linux @@ -60,5 +74,15 @@ def capture_screen_with_cursor(): return send_file(file_path, mimetype='image/png') +@app.route('/platform', methods=['GET']) +def get_platform(): + return platform.system() + + +@app.route('/cursor_position', methods=['GET']) +def get_cursor_position(): + return pyautogui.position().x, pyautogui.position().y + + if __name__ == '__main__': app.run(debug=True, host="0.0.0.0") diff --git a/desktop_env/server/requirements.txt b/desktop_env/server/requirements.txt index 32c0e96..f4cb1ab 100644 --- a/desktop_env/server/requirements.txt +++ b/desktop_env/server/requirements.txt @@ -2,3 +2,4 @@ python3-xlib==0.15 PyAutoGUI==0.9.54 Pillow==10.1.0 git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon +requests diff --git a/main.py b/main.py index 57c1926..9ef3982 100644 --- a/main.py +++ b/main.py @@ -10,13 +10,15 @@ def human_agent(): # path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", # host="192.168.7.128", host="http://192.168.13.128:5000", + snapshot_path="base3", ) # reset the environment to certain snapshot - # observation = env.reset() + observation = env.reset() done = False - while not done: + + for i in range(2): # action = get_human_action() # action = { @@ -24,7 +26,7 @@ def human_agent(): # "click_type": 3, # } - action = "pyautogui.dragTo(100, 200, button='left')" + action = "pyautogui.moveTo(10, 100)" if i == 0 else "pyautogui.click(button='right')" observation, reward, done, info = env.step(action) print("Observation:", observation)