Improve: fix bugs; add back the cursor in screenshot; add pause in env.step

This commit is contained in:
Timothyxxx
2023-12-02 22:14:50 +08:00
parent e51ef4b91d
commit 487fb8005b
5 changed files with 36 additions and 6 deletions

View File

@@ -4,7 +4,7 @@ import requests
class PythonController: class PythonController:
def __init__(self, http_server: str, pkgs_prefix: str = "py -c \"import pyautogui; {command}\""): def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
self.http_server = http_server self.http_server = http_server
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages

View File

@@ -85,10 +85,13 @@ class DesktopEnv(gym.Env):
observation = self._get_obs() observation = self._get_obs()
return observation return observation
def step(self, action): def step(self, action, pause=0.5):
# todo: support both the action space of our-designed space and the executable code space in pyautogui # todo: support both the action space of our-designed space and the executable code space in pyautogui
# Our action space is the set of all possible python commands insides `pyautogui` # Our action space is the set of all possible python commands insides `pyautogui`
self.controller.execute_python_command(action) self.controller.execute_python_command(action)
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
time.sleep(pause)
observation = self._get_obs() observation = self._get_obs()
reward = 0 # todo: Define reward calculation for each example reward = 0 # todo: Define reward calculation for each example
done = False # todo: Define episode termination condition for each example done = False # todo: Define episode termination condition for each example

View File

@@ -1,12 +1,14 @@
import os import os
import platform import platform
import subprocess import subprocess
import requests
import Xlib.display import Xlib.display
import pyautogui import pyautogui
from PIL import ImageGrab from PIL import ImageGrab, Image
from flask import Flask, request, jsonify, send_file from flask import Flask, request, jsonify, send_file
app = Flask(__name__) app = Flask(__name__)
pyautogui.PAUSE = 0 pyautogui.PAUSE = 0
@@ -43,7 +45,19 @@ def capture_screen_with_cursor():
os.makedirs(os.path.dirname(file_path), exist_ok=True) os.makedirs(os.path.dirname(file_path), exist_ok=True)
if user_platform == "Windows": if user_platform == "Windows":
def _download_image(url, path):
response = requests.get(url)
with open(path, 'wb') as file:
file.write(response.content)
cursor_path = os.path.join("screenshots", "cursor.png")
if not os.path.exists(cursor_path):
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
_download_image(cursor_url, cursor_path)
screenshot = pyautogui.screenshot() screenshot = pyautogui.screenshot()
cursor_x, cursor_y = pyautogui.position()
cursor = Image.open(cursor_path)
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
screenshot.save(file_path) screenshot.save(file_path)
elif user_platform == "Linux": elif user_platform == "Linux":
# Use xlib to prevent scrot dependency for Linux # Use xlib to prevent scrot dependency for Linux
@@ -60,5 +74,15 @@ def capture_screen_with_cursor():
return send_file(file_path, mimetype='image/png') return send_file(file_path, mimetype='image/png')
@app.route('/platform', methods=['GET'])
def get_platform():
return platform.system()
@app.route('/cursor_position', methods=['GET'])
def get_cursor_position():
return pyautogui.position().x, pyautogui.position().y
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0") app.run(debug=True, host="0.0.0.0")

View File

@@ -2,3 +2,4 @@ python3-xlib==0.15
PyAutoGUI==0.9.54 PyAutoGUI==0.9.54
Pillow==10.1.0 Pillow==10.1.0
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
requests

View File

@@ -10,13 +10,15 @@ def human_agent():
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", # path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
# host="192.168.7.128", # host="192.168.7.128",
host="http://192.168.13.128:5000", host="http://192.168.13.128:5000",
snapshot_path="base3",
) )
# reset the environment to certain snapshot # reset the environment to certain snapshot
# observation = env.reset() observation = env.reset()
done = False done = False
while not done:
for i in range(2):
# action = get_human_action() # action = get_human_action()
# action = { # action = {
@@ -24,7 +26,7 @@ def human_agent():
# "click_type": 3, # "click_type": 3,
# } # }
action = "pyautogui.dragTo(100, 200, button='left')" action = "pyautogui.moveTo(10, 100)" if i == 0 else "pyautogui.click(button='right')"
observation, reward, done, info = env.step(action) observation, reward, done, info = env.step(action)
print("Observation:", observation) print("Observation:", observation)