Improve: fix bugs; add back the cursor in screenshot; add pause in env.step
This commit is contained in:
@@ -4,7 +4,7 @@ import requests
|
||||
|
||||
|
||||
class PythonController:
|
||||
def __init__(self, http_server: str, pkgs_prefix: str = "py -c \"import pyautogui; {command}\""):
|
||||
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
|
||||
self.http_server = http_server
|
||||
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
|
||||
|
||||
|
||||
@@ -85,10 +85,13 @@ class DesktopEnv(gym.Env):
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
def step(self, action, pause=0.5):
|
||||
# todo: support both the action space of our-designed space and the executable code space in pyautogui
|
||||
# Our action space is the set of all possible python commands insides `pyautogui`
|
||||
self.controller.execute_python_command(action)
|
||||
|
||||
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
|
||||
time.sleep(pause)
|
||||
observation = self._get_obs()
|
||||
reward = 0 # todo: Define reward calculation for each example
|
||||
done = False # todo: Define episode termination condition for each example
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
import Xlib.display
|
||||
import pyautogui
|
||||
from PIL import ImageGrab
|
||||
from PIL import ImageGrab, Image
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
pyautogui.PAUSE = 0
|
||||
@@ -43,7 +45,19 @@ def capture_screen_with_cursor():
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
|
||||
if user_platform == "Windows":
|
||||
def _download_image(url, path):
|
||||
response = requests.get(url)
|
||||
with open(path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
|
||||
cursor_path = os.path.join("screenshots", "cursor.png")
|
||||
if not os.path.exists(cursor_path):
|
||||
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
|
||||
_download_image(cursor_url, cursor_path)
|
||||
screenshot = pyautogui.screenshot()
|
||||
cursor_x, cursor_y = pyautogui.position()
|
||||
cursor = Image.open(cursor_path)
|
||||
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
|
||||
screenshot.save(file_path)
|
||||
elif user_platform == "Linux":
|
||||
# Use xlib to prevent scrot dependency for Linux
|
||||
@@ -60,5 +74,15 @@ def capture_screen_with_cursor():
|
||||
return send_file(file_path, mimetype='image/png')
|
||||
|
||||
|
||||
@app.route('/platform', methods=['GET'])
|
||||
def get_platform():
|
||||
return platform.system()
|
||||
|
||||
|
||||
@app.route('/cursor_position', methods=['GET'])
|
||||
def get_cursor_position():
|
||||
return pyautogui.position().x, pyautogui.position().y
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host="0.0.0.0")
|
||||
|
||||
@@ -2,3 +2,4 @@ python3-xlib==0.15
|
||||
PyAutoGUI==0.9.54
|
||||
Pillow==10.1.0
|
||||
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
|
||||
requests
|
||||
|
||||
8
main.py
8
main.py
@@ -10,13 +10,15 @@ def human_agent():
|
||||
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
||||
# host="192.168.7.128",
|
||||
host="http://192.168.13.128:5000",
|
||||
snapshot_path="base3",
|
||||
)
|
||||
|
||||
# reset the environment to certain snapshot
|
||||
# observation = env.reset()
|
||||
observation = env.reset()
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
|
||||
for i in range(2):
|
||||
# action = get_human_action()
|
||||
|
||||
# action = {
|
||||
@@ -24,7 +26,7 @@ def human_agent():
|
||||
# "click_type": 3,
|
||||
# }
|
||||
|
||||
action = "pyautogui.dragTo(100, 200, button='left')"
|
||||
action = "pyautogui.moveTo(10, 100)" if i == 0 else "pyautogui.click(button='right')"
|
||||
|
||||
observation, reward, done, info = env.step(action)
|
||||
print("Observation:", observation)
|
||||
|
||||
Reference in New Issue
Block a user