Improve: fix bugs; add back the cursor in screenshot; add pause in env.step

This commit is contained in:
Timothyxxx
2023-12-02 22:14:50 +08:00
parent e51ef4b91d
commit 487fb8005b
5 changed files with 36 additions and 6 deletions

View File

@@ -4,7 +4,7 @@ import requests
class PythonController:
def __init__(self, http_server: str, pkgs_prefix: str = "py -c \"import pyautogui; {command}\""):
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
self.http_server = http_server
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages

View File

@@ -85,10 +85,13 @@ class DesktopEnv(gym.Env):
observation = self._get_obs()
return observation
def step(self, action):
def step(self, action, pause=0.5):
# todo: support both the action space of our-designed space and the executable code space in pyautogui
# Our action space is the set of all possible python commands insides `pyautogui`
self.controller.execute_python_command(action)
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
time.sleep(pause)
observation = self._get_obs()
reward = 0 # todo: Define reward calculation for each example
done = False # todo: Define episode termination condition for each example

View File

@@ -1,12 +1,14 @@
import os
import platform
import subprocess
import requests
import Xlib.display
import pyautogui
from PIL import ImageGrab
from PIL import ImageGrab, Image
from flask import Flask, request, jsonify, send_file
app = Flask(__name__)
pyautogui.PAUSE = 0
@@ -43,7 +45,19 @@ def capture_screen_with_cursor():
os.makedirs(os.path.dirname(file_path), exist_ok=True)
if user_platform == "Windows":
def _download_image(url, path):
response = requests.get(url)
with open(path, 'wb') as file:
file.write(response.content)
cursor_path = os.path.join("screenshots", "cursor.png")
if not os.path.exists(cursor_path):
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
_download_image(cursor_url, cursor_path)
screenshot = pyautogui.screenshot()
cursor_x, cursor_y = pyautogui.position()
cursor = Image.open(cursor_path)
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
screenshot.save(file_path)
elif user_platform == "Linux":
# Use xlib to prevent scrot dependency for Linux
@@ -60,5 +74,15 @@ def capture_screen_with_cursor():
return send_file(file_path, mimetype='image/png')
@app.route('/platform', methods=['GET'])
def get_platform():
return platform.system()
@app.route('/cursor_position', methods=['GET'])
def get_cursor_position():
return pyautogui.position().x, pyautogui.position().y
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0")

View File

@@ -2,3 +2,4 @@ python3-xlib==0.15
PyAutoGUI==0.9.54
Pillow==10.1.0
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
requests

View File

@@ -10,13 +10,15 @@ def human_agent():
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
# host="192.168.7.128",
host="http://192.168.13.128:5000",
snapshot_path="base3",
)
# reset the environment to certain snapshot
# observation = env.reset()
observation = env.reset()
done = False
while not done:
for i in range(2):
# action = get_human_action()
# action = {
@@ -24,7 +26,7 @@ def human_agent():
# "click_type": 3,
# }
action = "pyautogui.dragTo(100, 200, button='left')"
action = "pyautogui.moveTo(10, 100)" if i == 0 else "pyautogui.click(button='right')"
observation, reward, done, info = env.step(action)
print("Observation:", observation)