Merge branch 'main' into zdy
This commit is contained in:
BIN
desktop_env/assets/cursor.png
Normal file
BIN
desktop_env/assets/cursor.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.7 KiB |
@@ -1,35 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from fabric import Connection
|
||||
|
||||
from .xdotool import XDoToolController
|
||||
from .python import PythonController
|
||||
|
||||
class AbstractKeyboardController(ABC):
|
||||
@abstractmethod
|
||||
def type(self, text: str):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def key(self, key: str):
|
||||
raise NotImplementedError
|
||||
|
||||
class XDoToolKeyboardController(AbstractKeyboardController, XDoToolController):
|
||||
def __init__(self, ssh_connection: Connection):
|
||||
super().__init__(ssh_connection=ssh_connection)
|
||||
|
||||
def type(self, text: str):
|
||||
self._execute_xdotool_command(f"type {text}")
|
||||
|
||||
def key(self, key: str):
|
||||
self._execute_xdotool_command(f"key {key}")
|
||||
|
||||
class PythonKeyboardController(AbstractKeyboardController, PythonController):
|
||||
def __init__(self, http_server: str):
|
||||
super().__init__(http_server=http_server)
|
||||
self.command = "python -c \"import keyboard; {command}\""
|
||||
|
||||
def type(self, text: str):
|
||||
self._execute_python_command(self.command.format(command=f"keyboard.write('{text}')"))
|
||||
|
||||
def key(self, key: str):
|
||||
self._execute_python_command(self.command.format(command=f"keyboard.press_and_release('{key}')"))
|
||||
@@ -1,144 +0,0 @@
|
||||
from enum import Enum
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from fabric import Connection
|
||||
|
||||
from .xdotool import XDoToolController
|
||||
from .python import PythonController
|
||||
class MouseClick(Enum):
|
||||
LEFT = 1
|
||||
MIDDLE = 2
|
||||
RIGHT = 3
|
||||
WHEEL_UP = 4
|
||||
WHEEL_DOWN = 5
|
||||
|
||||
class AbstractMouseController(ABC):
|
||||
@abstractmethod
|
||||
def mouse_move(self, x: int, y: int):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def left_down(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def left_up(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def left_click(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def middle_down(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def middle_up(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def middle_click(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def right_down(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def right_up(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def right_click(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def scroll_up(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def scroll_down(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class XDoToolMouseController(AbstractMouseController, XDoToolController):
|
||||
def __init__(self, ssh_connection: Connection):
|
||||
super().__init__(ssh_connection=ssh_connection)
|
||||
|
||||
def mouse_move(self, x: int, y: int):
|
||||
self._execute_xdotool_command(f"mousemove {x} {y}")
|
||||
|
||||
def left_down(self):
|
||||
self._execute_xdotool_command(f"mousedown 1")
|
||||
|
||||
def left_up(self):
|
||||
self._execute_xdotool_command(f"mouseup 1")
|
||||
|
||||
def left_click(self):
|
||||
self._execute_xdotool_command(f"click 1")
|
||||
|
||||
def middle_down(self):
|
||||
self._execute_xdotool_command(f"mousedown 2")
|
||||
|
||||
def middle_up(self):
|
||||
self._execute_xdotool_command(f"mouseup 2")
|
||||
|
||||
def middle_click(self):
|
||||
self._execute_xdotool_command(f"click 2")
|
||||
|
||||
def right_down(self):
|
||||
self._execute_xdotool_command(f"mousedown 3")
|
||||
|
||||
def right_up(self):
|
||||
self._execute_xdotool_command(f"mouseup 3")
|
||||
|
||||
def right_click(self):
|
||||
self._execute_xdotool_command(f"click 3")
|
||||
|
||||
def scroll_up(self):
|
||||
self._execute_xdotool_command(f"click 4")
|
||||
|
||||
def scroll_down(self):
|
||||
self._execute_xdotool_command(f"click 5")
|
||||
|
||||
class PythonMouseController(AbstractMouseController, PythonController):
|
||||
def __init__(self, http_server: str):
|
||||
super().__init__(http_server=http_server)
|
||||
self.command = "python -c \"import mouse; {command}\""
|
||||
|
||||
def mouse_move(self, x: int, y: int):
|
||||
self._execute_python_command(self.command.format(command=f"mouse.move({x}, {y})"))
|
||||
|
||||
def left_down(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.press(button='left')"))
|
||||
|
||||
def left_up(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.release(button='left')"))
|
||||
|
||||
def left_click(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.click(button='left')"))
|
||||
|
||||
def middle_down(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.press(button='middle')"))
|
||||
|
||||
def middle_up(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.release(button='middle')"))
|
||||
|
||||
def middle_click(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.click(button='middle')"))
|
||||
|
||||
def right_down(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.press(button='right')"))
|
||||
|
||||
def right_up(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.release(button='right')"))
|
||||
|
||||
def right_click(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.click(button='right')"))
|
||||
|
||||
def scroll_up(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.wheel(10)"))
|
||||
|
||||
def scroll_down(self):
|
||||
self._execute_python_command(self.command.format(command="mouse.wheel(-10)"))
|
||||
@@ -1,34 +1,208 @@
|
||||
import requests
|
||||
import json
|
||||
from typing import Any, Dict
|
||||
import requests
|
||||
from desktop_env.envs.actions import KEYBOARD_KEYS
|
||||
|
||||
|
||||
class PythonController:
|
||||
def __init__(self, http_server: str):
|
||||
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
|
||||
self.http_server = http_server
|
||||
|
||||
def _execute_python_command(self, command: str) -> None:
|
||||
payload = json.dumps({
|
||||
"command": command
|
||||
})
|
||||
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
|
||||
|
||||
def get_screenshot(self):
|
||||
"""
|
||||
Gets a screenshot from the server. With the cursor.
|
||||
"""
|
||||
response = requests.get(self.http_server + "/screenshot")
|
||||
if response.status_code == 200:
|
||||
return response.content
|
||||
else:
|
||||
print("Failed to get screenshot. Status code:", response.status_code)
|
||||
return None
|
||||
|
||||
def get_file(self, file_path: str):
|
||||
"""
|
||||
Gets a file from the server.
|
||||
"""
|
||||
response = requests.post(self.http_server + "/file", data={"file_path": file_path})
|
||||
if response.status_code == 200:
|
||||
print("File downloaded successfully")
|
||||
return response.content
|
||||
else:
|
||||
print("Failed to get file. Status code:", response.status_code)
|
||||
return None
|
||||
|
||||
def execute_python_command(self, command: str) -> None:
|
||||
"""
|
||||
Executes a python command on the server.
|
||||
It can be used to execute the pyautogui commands, or... any other python command. who knows?
|
||||
"""
|
||||
command = self.pkgs_prefix.format(command=command)
|
||||
payload = json.dumps({"command": command})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
print("Command executed successfully:", response.text)
|
||||
else:
|
||||
print("Failed to execute command. Status code:", response.status_code)
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
print("An error occurred while trying to execute the command:", e)
|
||||
|
||||
# example usage
|
||||
if __name__ == '__main__':
|
||||
# replace with your actual server URL of the vm
|
||||
server_url = "http://192.168.7.129:5000"
|
||||
controller = PythonController(server_url)
|
||||
def execute_action(self, action: Dict[str, Any]):
|
||||
"""
|
||||
Executes an action on the server computer.
|
||||
"""
|
||||
|
||||
# example commands
|
||||
python_command = "python -c \"import keyboard; keyboard.write('hello world')\""
|
||||
python_command = "python -c \"import mouse; mouse.move(100,100);mouse.right_click()\""
|
||||
controller._execute_python_command(python_command)
|
||||
action_type = action["action_type"]
|
||||
parameters = action["parameters"] if "parameters" in action else {}
|
||||
|
||||
if action_type == "MOVE_TO":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.moveTo()")
|
||||
elif "x" in parameters and "y" in parameters:
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
self.execute_python_command(f"pyautogui.moveTo({x}, {y})")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "CLICK":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.click()")
|
||||
elif "button" in parameters and "x" in parameters and "y" in parameters:
|
||||
button = parameters["button"]
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
if "num_clicks" in parameters:
|
||||
num_clicks = parameters["num_clicks"]
|
||||
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
|
||||
else:
|
||||
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
|
||||
elif "button" in parameters and "x" not in parameters and "y" not in parameters:
|
||||
button = parameters["button"]
|
||||
if "num_clicks" in parameters:
|
||||
num_clicks = parameters["num_clicks"]
|
||||
self.execute_python_command(f"pyautogui.click(button='{button}', clicks={num_clicks})")
|
||||
else:
|
||||
self.execute_python_command(f"pyautogui.click(button='{button}')")
|
||||
elif "button" not in parameters and "x" in parameters and "y" in parameters:
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
if "num_clicks" in parameters:
|
||||
num_clicks = parameters["num_clicks"]
|
||||
self.execute_python_command(f"pyautogui.click(x={x}, y={y}, clicks={num_clicks})")
|
||||
else:
|
||||
self.execute_python_command(f"pyautogui.click(x={x}, y={y})")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "MOUSE_DOWN":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.mouseDown()")
|
||||
elif "button" in parameters:
|
||||
button = parameters["button"]
|
||||
self.execute_python_command(f"pyautogui.mouseDown(button='{button}')")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "MOUSE_UP":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.mouseUp()")
|
||||
elif "button" in parameters:
|
||||
button = parameters["button"]
|
||||
self.execute_python_command(f"pyautogui.mouseUp(button='{button}')")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "RIGHT_CLICK":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.rightClick()")
|
||||
elif "x" in parameters and "y" in parameters:
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
self.execute_python_command(f"pyautogui.rightClick(x={x}, y={y})")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "DOUBLE_CLICK":
|
||||
if parameters == {} or None:
|
||||
self.execute_python_command(f"pyautogui.doubleClick()")
|
||||
elif "x" in parameters and "y" in parameters:
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
self.execute_python_command(f"pyautogui.doubleClick(x={x}, y={y})")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "DRAG_TO":
|
||||
if "x" in parameters and "y" in parameters:
|
||||
x = parameters["x"]
|
||||
y = parameters["y"]
|
||||
self.execute_python_command(f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
|
||||
|
||||
elif action_type == "SCROLL":
|
||||
# todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
|
||||
if "dx" in parameters and "dy" in parameters:
|
||||
dx = parameters["dx"]
|
||||
dy = parameters["dy"]
|
||||
self.execute_python_command(f"pyautogui.hscroll({dx})")
|
||||
self.execute_python_command(f"pyautogui.vscroll({dy})")
|
||||
elif "dx" in parameters and "dy" not in parameters:
|
||||
dx = parameters["dx"]
|
||||
self.execute_python_command(f"pyautogui.hscroll({dx})")
|
||||
elif "dx" not in parameters and "dy" in parameters:
|
||||
dy = parameters["dy"]
|
||||
self.execute_python_command(f"pyautogui.vscroll({dy})")
|
||||
else:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
|
||||
elif action_type == "TYPING":
|
||||
if "text" not in parameters:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
text = parameters["text"]
|
||||
self.execute_python_command(f"pyautogui.typewrite('{text}')")
|
||||
|
||||
elif action_type == "PRESS":
|
||||
if "key" not in parameters:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
key = parameters["key"]
|
||||
if key.lower() not in KEYBOARD_KEYS:
|
||||
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||
self.execute_python_command(f"pyautogui.press('{key}')")
|
||||
|
||||
elif action_type == "KEY_DOWN":
|
||||
if "key" not in parameters:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
key = parameters["key"]
|
||||
if key.lower() not in KEYBOARD_KEYS:
|
||||
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||
self.execute_python_command(f"pyautogui.keyDown('{key}')")
|
||||
|
||||
elif action_type == "KEY_UP":
|
||||
if "key" not in parameters:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
key = parameters["key"]
|
||||
if key.lower() not in KEYBOARD_KEYS:
|
||||
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||
self.execute_python_command(f"pyautogui.keyUp('{key}')")
|
||||
|
||||
elif action_type == "HOTKEY":
|
||||
if "keys" not in parameters:
|
||||
raise Exception(f"Unknown parameters: {parameters}")
|
||||
keys = parameters["keys"]
|
||||
if not isinstance(keys, list):
|
||||
raise Exception(f"Keys must be a list of keys")
|
||||
for key in keys:
|
||||
if key.lower() not in KEYBOARD_KEYS:
|
||||
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||
|
||||
keys_para_rep = "', '".join(keys)
|
||||
self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
|
||||
|
||||
else:
|
||||
raise Exception(f"Unknown action type: {action_type}")
|
||||
|
||||
96
desktop_env/controllers/setup.py
Normal file
96
desktop_env/controllers/setup.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
class SetupController:
|
||||
def __init__(self, http_server: str):
|
||||
self.http_server = http_server + "/setup"
|
||||
|
||||
def setup(self, config):
|
||||
"""
|
||||
Setup Config:
|
||||
{
|
||||
download: list[tuple[string]], # a list of tuples of url of file to download and the save path
|
||||
...
|
||||
}
|
||||
"""
|
||||
self._download_setup(config)
|
||||
self._change_wallpaper(config)
|
||||
# self._tidy_desktop(config) todo: implement this
|
||||
self._open_setup(config)
|
||||
# can add other setup steps
|
||||
|
||||
def _download_setup(self, config):
|
||||
if not config:
|
||||
return
|
||||
if not 'download' in config:
|
||||
return
|
||||
for url, path in config['download']:
|
||||
if not url or not path:
|
||||
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
|
||||
|
||||
payload = json.dumps({"url": url, "path": path})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# send request to server to download file
|
||||
try:
|
||||
response = requests.post(self.http_server + "/download_file", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
print("Command executed successfully:", response.text)
|
||||
else:
|
||||
print("Failed to download file. Status code:", response.text)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print("An error occurred while trying to send the request:", e)
|
||||
|
||||
def _change_wallpaper(self, config):
|
||||
if not config:
|
||||
return
|
||||
if not 'wallpaper' in config:
|
||||
return
|
||||
path = config['wallpaper']
|
||||
if not path:
|
||||
raise Exception(f"Setup Wallpaper - Invalid path ({path}).")
|
||||
|
||||
payload = json.dumps({"path": path})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# send request to server to change wallpaper
|
||||
try:
|
||||
response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
print("Command executed successfully:", response.text)
|
||||
else:
|
||||
print("Failed to change wallpaper. Status code:", response.text)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print("An error occurred while trying to send the request:", e)
|
||||
|
||||
def _tidy_desktop(self, config):
|
||||
raise NotImplementedError
|
||||
|
||||
def _open_setup(self, config):
|
||||
if not config:
|
||||
return
|
||||
if not 'open' in config:
|
||||
return
|
||||
for path in config['open']:
|
||||
if not path:
|
||||
raise Exception(f"Setup Open - Invalid path ({path}).")
|
||||
|
||||
payload = json.dumps({"path": path})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# send request to server to open file
|
||||
try:
|
||||
response = requests.post(self.http_server + "/open_file", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
print("Command executed successfully:", response.text)
|
||||
else:
|
||||
print("Failed to open file. Status code:", response.text)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print("An error occurred while trying to send the request:", e)
|
||||
@@ -1,9 +0,0 @@
|
||||
from fabric import Connection
|
||||
|
||||
class XDoToolController:
|
||||
def __init__(self, ssh_connection: Connection):
|
||||
self.ssh_connection = ssh_connection
|
||||
|
||||
def _execute_xdotool_command(self, command: list[str]) -> None:
|
||||
result = self.ssh_connection.run(f"DISPLAY=:0 xdotool {command}", hide=True)
|
||||
return result.stdout.strip()
|
||||
190
desktop_env/envs/actions.py
Normal file
190
desktop_env/envs/actions.py
Normal file
@@ -0,0 +1,190 @@
|
||||
X_MAX = 1920 # TODO: get the screen resolution
|
||||
Y_MAX = 1080
|
||||
|
||||
KEYBOARD_KEYS = ['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright']
|
||||
|
||||
ACTION_SPACE = [
|
||||
{
|
||||
"action_type": "MOVE_TO",
|
||||
"note": "move the cursor to the specified position",
|
||||
"parameters": {
|
||||
"x": {
|
||||
"type": float,
|
||||
"range": [0, X_MAX],
|
||||
"optional": False,
|
||||
},
|
||||
"y": {
|
||||
"type": float,
|
||||
"range": [0, Y_MAX],
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "CLICK",
|
||||
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
|
||||
"parameters": {
|
||||
"button": {
|
||||
"type": str,
|
||||
"range": ["left", "right", "middle"],
|
||||
"optional": True,
|
||||
},
|
||||
"x": {
|
||||
"type": float,
|
||||
"range": [0, X_MAX],
|
||||
"optional": True,
|
||||
},
|
||||
"y": {
|
||||
"type": float,
|
||||
"range": [0, Y_MAX],
|
||||
"optional": True,
|
||||
},
|
||||
"num_clicks": {
|
||||
"type": int,
|
||||
"range": [1, 2, 3],
|
||||
"optional": True,
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "MOUSE_DOWN",
|
||||
"note": "press the left button if the button not specified, otherwise press the specified button",
|
||||
"parameters": {
|
||||
"button": {
|
||||
"type": str,
|
||||
"range": ["left", "right", "middle"],
|
||||
"optional": True,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "MOUSE_UP",
|
||||
"note": "release the left button if the button not specified, otherwise release the specified button",
|
||||
"parameters": {
|
||||
"button": {
|
||||
"type": str,
|
||||
"range": ["left", "right", "middle"],
|
||||
"optional": True,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "RIGHT_CLICK",
|
||||
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
|
||||
"parameters": {
|
||||
"x": {
|
||||
"type": float,
|
||||
"range": [0, X_MAX],
|
||||
"optional": True,
|
||||
},
|
||||
"y": {
|
||||
"type": float,
|
||||
"range": [0, Y_MAX],
|
||||
"optional": True,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "DOUBLE_CLICK",
|
||||
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
|
||||
"parameters": {
|
||||
"x": {
|
||||
"type": float,
|
||||
"range": [0, X_MAX],
|
||||
"optional": True,
|
||||
},
|
||||
"y": {
|
||||
"type": float,
|
||||
"range": [0, Y_MAX],
|
||||
"optional": True,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "DRAG_TO",
|
||||
"note": "drag the cursor to the specified position with the left button pressed",
|
||||
"parameters": {
|
||||
"x": {
|
||||
"type": float,
|
||||
"range": [0, X_MAX],
|
||||
"optional": False,
|
||||
},
|
||||
"y": {
|
||||
"type": float,
|
||||
"range": [0, Y_MAX],
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "SCROLL",
|
||||
"note": "scroll the mouse wheel up or down",
|
||||
"parameters": {
|
||||
"dx": {
|
||||
"type": int,
|
||||
"range": None,
|
||||
"optional": False,
|
||||
},
|
||||
"dy": {
|
||||
"type": int,
|
||||
"range": None,
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "TYPING",
|
||||
"note": "type the specified text",
|
||||
"parameters": {
|
||||
"text": {
|
||||
"type": str,
|
||||
"range": None,
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "PRESS",
|
||||
"note": "press the specified key and release it",
|
||||
"parameters": {
|
||||
"key": {
|
||||
"type": str,
|
||||
"range": KEYBOARD_KEYS,
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "KEY_DOWN",
|
||||
"note": "press the specified key",
|
||||
"parameters": {
|
||||
"key": {
|
||||
"type": str,
|
||||
"range": KEYBOARD_KEYS,
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "KEY_UP",
|
||||
"note": "release the specified key",
|
||||
"parameters": {
|
||||
"key": {
|
||||
"type": str,
|
||||
"range": KEYBOARD_KEYS,
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"action_type": "HOTKEY",
|
||||
"note": "press the specified key combination",
|
||||
"parameters": {
|
||||
"keys": {
|
||||
"type": list,
|
||||
"range": [KEYBOARD_KEYS],
|
||||
"optional": False,
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,203 +1,186 @@
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from fabric import Connection
|
||||
import time
|
||||
import uuid
|
||||
import platform
|
||||
from typing import List
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from desktop_env.controllers.mouse import MouseClick, AbstractMouseController, XDoToolMouseController, PythonMouseController
|
||||
from desktop_env.controllers.keyboard import AbstractKeyboardController, XDoToolKeyboardController, PythonKeyboardController
|
||||
from desktop_env.controllers.python import PythonController
|
||||
from desktop_env.controllers.setup import SetupController
|
||||
from desktop_env.evaluators import eval_funcs
|
||||
|
||||
class Action(Enum):
|
||||
CLICK = 0
|
||||
MOUSE_DOWN = 1
|
||||
MOUSE_UP = 2
|
||||
MOUSE_MOVE = 3
|
||||
KEY = 4
|
||||
TYPE = 5
|
||||
|
||||
VM_TYPE = Literal['ubuntu', 'windows']
|
||||
def _execute_command(command: List[str]) -> None:
|
||||
if command[:4] == ["vmrun", "-T", "ws", "start"]:
|
||||
p = subprocess.Popen(command)
|
||||
p.wait()
|
||||
else:
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True)
|
||||
if result.returncode != 0:
|
||||
raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m")
|
||||
return result.stdout
|
||||
|
||||
|
||||
class DesktopEnv(gym.Env):
|
||||
"""DesktopEnv with OpenAI Gym interface."""
|
||||
|
||||
def __init__(self, path_to_vm: str, username: str, password: str,
|
||||
host: str, snapshot_path: str = "snapshot", vm_os: VM_TYPE = "ubuntu"):
|
||||
def __init__(
|
||||
self,
|
||||
path_to_vm: str,
|
||||
snapshot_path: str = "base",
|
||||
instruction: str = None,
|
||||
config: dict = None,
|
||||
evaluator: dict = None,
|
||||
action_space: str = "computer_13",
|
||||
):
|
||||
# Initialize environment variables
|
||||
self.path_to_vm = path_to_vm
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.host = host
|
||||
self.snapshot_path = snapshot_path
|
||||
|
||||
self.screen_width = 800
|
||||
self.screen_height = 800
|
||||
# Define the action and observation space
|
||||
self.action_space = spaces.Dict({
|
||||
"action_type": spaces.Discrete(len(Action)),
|
||||
"click_type": spaces.Discrete(len(MouseClick)),
|
||||
"x": spaces.Discrete(self.screen_width),
|
||||
"y": spaces.Discrete(self.screen_height),
|
||||
"key": spaces.MultiDiscrete([128] * 10), # max 10 characters, ASCII
|
||||
"text": spaces.MultiDiscrete([128] * 10) # max 10 characters, ASCII
|
||||
})
|
||||
self.snapshot_path = snapshot_path # todo: handling the logic of snapshot directory
|
||||
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
|
||||
|
||||
# Additional setup
|
||||
self.metadata = {'render.modes': ['rgb_array']}
|
||||
# Initialize emulator and controller
|
||||
print("Initializing...")
|
||||
self._start_emulator()
|
||||
self._wait_for_emulator_load()
|
||||
self.host = f"http://{self._get_vm_ip()}:5000"
|
||||
self.controller = PythonController(http_server=self.host)
|
||||
self.setup_controller = SetupController(http_server=self.host)
|
||||
self.instruction = instruction
|
||||
self.config = config
|
||||
self.evaluator = evaluator
|
||||
|
||||
# set up controllers
|
||||
self.mouse_controller, self.keyboard_controller = self._create_controllers(vm_os)
|
||||
|
||||
def _create_controllers(self, vm_os: VM_TYPE) -> tuple[AbstractMouseController, AbstractKeyboardController]:
|
||||
if vm_os == "ubuntu":
|
||||
ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": self.password})
|
||||
mouse_controller = XDoToolMouseController(ssh_connection)
|
||||
keyboard_controller = XDoToolKeyboardController(ssh_connection)
|
||||
elif vm_os == "windows":
|
||||
mouse_controller = PythonMouseController(http_server=self.host)
|
||||
keyboard_controller = PythonKeyboardController(http_server=self.host)
|
||||
else:
|
||||
raise NotImplementedError(vm_os)
|
||||
|
||||
return mouse_controller, keyboard_controller
|
||||
# mode: human or machine
|
||||
assert action_space in ["computer_13", "pyautogui"]
|
||||
self.action_space = action_space
|
||||
# todo: define the action space and the observation space as gym did, or extend theirs
|
||||
|
||||
def _start_emulator(self):
|
||||
self._execute_command(["vmrun", "start", self.path_to_vm])
|
||||
|
||||
def _wait_for_emulator_load(self):
|
||||
while True:
|
||||
try:
|
||||
output = subprocess.check_output("vmrun -T ws list", shell=True, stderr=subprocess.STDOUT)
|
||||
output = output.decode()
|
||||
if self.path_to_vm.lstrip("~/") in output:
|
||||
print("VM is running.")
|
||||
return
|
||||
break
|
||||
else:
|
||||
print("Waiting for VM to start...")
|
||||
time.sleep(5)
|
||||
print("Starting VM...")
|
||||
_execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
|
||||
time.sleep(3)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error executing command: {e.output.decode().strip()}")
|
||||
return
|
||||
|
||||
def _execute_command(self, command: list[str]) -> None:
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
if process.returncode != 0:
|
||||
print(f"Error executing command: {command}")
|
||||
print(stderr.decode())
|
||||
return None
|
||||
else:
|
||||
return stdout.decode()
|
||||
|
||||
def _execute_xdotool_command(self, command: list[str]) -> None:
|
||||
result = self.ssh_connection.run(f"DISPLAY=:0 xdotool {command}", hide=True)
|
||||
return result.stdout.strip()
|
||||
def _get_vm_ip(self):
|
||||
max_retries = 10
|
||||
print("Getting IP Address...")
|
||||
for _ in range(max_retries):
|
||||
try:
|
||||
output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
|
||||
print(f"IP address: {output}")
|
||||
return output
|
||||
except:
|
||||
time.sleep(5)
|
||||
print("Retrying...")
|
||||
raise Exception("Failed to get VM IP address!")
|
||||
|
||||
def _save_state(self):
|
||||
self._execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
|
||||
|
||||
def _click(self, click: MouseClick):
|
||||
self._execute_xdotool_command(f"click {click.value}")
|
||||
|
||||
def _mousedown(self, click: MouseClick):
|
||||
self._execute_xdotool_command(f"mousedown {click.value}")
|
||||
|
||||
def _mouseup(self, click: MouseClick):
|
||||
self._execute_xdotool_command(f"mouseup {click.value}")
|
||||
|
||||
def _mouse_move(self, x: int, y: int):
|
||||
self._execute_xdotool_command(f"mousemove {x} {y}")
|
||||
|
||||
def _key(self, key: str):
|
||||
self._execute_xdotool_command(f"key {key}")
|
||||
|
||||
def _type(self, text: str):
|
||||
self._execute_xdotool_command(f"type {text}")
|
||||
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
|
||||
|
||||
def _get_screenshot(self):
|
||||
image_path = "./screenshot.png"
|
||||
self._execute_command(["vmrun", "-T", "ws", "-gu", self.username, "-gp", self.password, "captureScreen", self.path_to_vm, image_path])
|
||||
random_uuid = str(uuid.uuid4())
|
||||
os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
|
||||
image_path = os.path.join("tmp", random_uuid, "screenshot.png")
|
||||
|
||||
# Get the screenshot and save to the image_path
|
||||
screenshot = self.controller.get_screenshot()
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
return image_path
|
||||
|
||||
|
||||
def _get_obs(self):
|
||||
print("OBS 1")
|
||||
screenshot_image_path = self._get_screenshot()
|
||||
print("OBS 2")
|
||||
with Image.open(screenshot_image_path) as img:
|
||||
return np.array(img)
|
||||
return screenshot_image_path
|
||||
|
||||
def reset(self):
|
||||
input("Reset #1 PE")
|
||||
#self._execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
|
||||
input("Revert to snapshot #2 PE")
|
||||
def reset(self, seed=None, options=None):
|
||||
print("Resetting environment...")
|
||||
|
||||
print("Reverting to snapshot to {}...".format(self.snapshot_path))
|
||||
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
|
||||
time.sleep(5)
|
||||
|
||||
print("Starting emulator...")
|
||||
self._start_emulator()
|
||||
input("Started emulator #3 PE")
|
||||
self._wait_for_emulator_load()
|
||||
observation = self._get_obs()
|
||||
print("Emulator started.")
|
||||
|
||||
print("Setting up environment...")
|
||||
self.setup_controller.setup(self.config)
|
||||
|
||||
time.sleep(5)
|
||||
print("Environment setup complete.")
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
action_type = Action(action['action_type'])
|
||||
if action_type == Action.CLICK:
|
||||
click = MouseClick(action['click_type'])
|
||||
if click == MouseClick.LEFT:
|
||||
self.mouse_controller.left_click()
|
||||
elif click == MouseClick.MIDDLE:
|
||||
self.mouse_controller.middle_click()
|
||||
elif click == MouseClick.RIGHT:
|
||||
self.mouse_controller.right_click()
|
||||
elif click == MouseClick.WHEEL_UP:
|
||||
self.mouse_controller.scroll_up()
|
||||
elif click == MouseClick.WHEEL_DOWN:
|
||||
self.mouse_controller.scroll_down()
|
||||
elif action_type == Action.MOUSE_DOWN:
|
||||
click = MouseClick(action['click_type'])
|
||||
if click == MouseClick.LEFT:
|
||||
self.mouse_controller.left_down()
|
||||
elif click == MouseClick.MIDDLE:
|
||||
self.mouse_controller.middle_down()
|
||||
elif click == MouseClick.RIGHT:
|
||||
self.mouse_controller.right_down()
|
||||
elif click == MouseClick.WHEEL_UP:
|
||||
self.mouse_controller.scroll_up()
|
||||
elif click == MouseClick.WHEEL_DOWN:
|
||||
self.mouse_controller.scroll_down()
|
||||
elif action_type == Action.MOUSE_UP:
|
||||
click = MouseClick(action['click_type'])
|
||||
if click == MouseClick.LEFT:
|
||||
self.mouse_controller.left_up()
|
||||
elif click == MouseClick.MIDDLE:
|
||||
self.mouse_controller.middle_up()
|
||||
elif click == MouseClick.RIGHT:
|
||||
self.mouse_controller.right_up()
|
||||
elif click == MouseClick.WHEEL_UP:
|
||||
self.mouse_controller.scroll_up()
|
||||
elif click == MouseClick.WHEEL_DOWN:
|
||||
self.mouse_controller.scroll_down()
|
||||
elif action_type == Action.MOUSE_MOVE:
|
||||
self.mouse_controller.mouse_move(x = action['x'], y = action['y'])
|
||||
elif action_type == Action.KEY:
|
||||
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
||||
self.keyboard_controller.key(key_sequence)
|
||||
elif action_type == Action.TYPE:
|
||||
text = ''.join(map(chr, action['text'])) # Convert integer array to string
|
||||
self.keyboard_controller.type(text)
|
||||
def step(self, action, pause=0.5):
|
||||
# fixme: add reminding logic here, decide if the action is valid for the current action_space
|
||||
if self.action_space == "computer_13":
|
||||
# the set of all possible actions defined in the action representation
|
||||
self.controller.execute_action(action)
|
||||
elif self.action_space == "pyautogui":
|
||||
# the set of all possible python commands insides `pyautogui`
|
||||
self.controller.execute_python_command(action)
|
||||
|
||||
# Capture new state
|
||||
observation = self._get_obs()
|
||||
reward = 0 # Define reward calculation
|
||||
done = False # Define episode termination condition
|
||||
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
|
||||
time.sleep(pause)
|
||||
observation = {
|
||||
"screenshot": self._get_obs(),
|
||||
"instruction": self.instruction
|
||||
}
|
||||
reward = 0 # todo: Define reward calculation for each example
|
||||
done = False # todo: Define episode termination condition for each example
|
||||
info = {}
|
||||
return observation, reward, done, info
|
||||
|
||||
def evaluate(self):
|
||||
"""
|
||||
Evaluate whether the task is successfully completed.
|
||||
"""
|
||||
def copy_file_to_local(_file_info):
|
||||
random_uuid = str(uuid.uuid4())
|
||||
os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
|
||||
_path = os.path.join("tmp", random_uuid, "tmp.xlsx")
|
||||
if _file_info["type"] == "cloud_file":
|
||||
url = _file_info["path"]
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
elif _file_info["type"] == "vm_file":
|
||||
# fixme: stream this part maybe as well
|
||||
file = self.controller.get_file(_file_info["path"])
|
||||
with open(_path, "wb") as f:
|
||||
f.write(file)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return _path
|
||||
|
||||
# todo: make this more flexible by refactoring
|
||||
eval_func = eval_funcs[self.evaluator["func"]]
|
||||
eval_func_vars = {}
|
||||
|
||||
for var_name, file_info in self.evaluator["paths"].items():
|
||||
path = copy_file_to_local(file_info)
|
||||
eval_func_vars[var_name] = path
|
||||
|
||||
return eval_func(**eval_func_vars)
|
||||
|
||||
def render(self, mode='rgb_array'):
|
||||
if mode == 'rgb_array':
|
||||
return self._get_obs()
|
||||
@@ -205,4 +188,4 @@ class DesktopEnv(gym.Env):
|
||||
raise ValueError('Unsupported render mode: {}'.format(mode))
|
||||
|
||||
def close(self):
|
||||
self._execute_command(["vmrun", "stop", self.path_to_vm])
|
||||
_execute_command(["vmrun", "stop", self.path_to_vm])
|
||||
|
||||
5
desktop_env/evaluators/__init__.py
Normal file
5
desktop_env/evaluators/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .table import compare_table
|
||||
|
||||
eval_funcs = {
|
||||
"compare_table(expected, actual)": compare_table
|
||||
}
|
||||
0
desktop_env/evaluators/replay.py
Normal file
0
desktop_env/evaluators/replay.py
Normal file
14
desktop_env/evaluators/table.py
Normal file
14
desktop_env/evaluators/table.py
Normal file
@@ -0,0 +1,14 @@
|
||||
def compare_table(expected, actual):
|
||||
import pandas as pd
|
||||
df1 = pd.read_excel(expected)
|
||||
df2 = pd.read_excel(actual)
|
||||
|
||||
# Compare the DataFrames
|
||||
return 1 if df1.equals(df2) else 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
path1 = ""
|
||||
path2 = ""
|
||||
|
||||
print(compare_table(path1, path2))
|
||||
190
desktop_env/server/main.py
Normal file
190
desktop_env/server/main.py
Normal file
@@ -0,0 +1,190 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import platform
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
import Xlib.display
|
||||
import pyautogui
|
||||
from PIL import ImageGrab, Image
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
pyautogui.PAUSE = 0
|
||||
pyautogui.DARWIN_CATCH_UP_TIME = 0
|
||||
|
||||
|
||||
@app.route('/execute', methods=['POST'])
|
||||
def execute_command():
|
||||
data = request.json
|
||||
# The 'command' key in the JSON request should contain the command to be executed.
|
||||
command = data.get('command', '')
|
||||
|
||||
# Execute the command without any safety checks.
|
||||
try:
|
||||
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'output': result.stdout,
|
||||
'error': result.stderr
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
@app.route('/screenshot', methods=['GET'])
|
||||
def capture_screen_with_cursor():
|
||||
# fixme: when running on virtual machines, the cursor is not captured, don't know why
|
||||
|
||||
file_path = os.path.join("screenshots", "screenshot.png")
|
||||
user_platform = platform.system()
|
||||
|
||||
# Ensure the screenshots directory exists
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
|
||||
|
||||
# fixme: This is a temporary fix for the cursor not being captured on Windows and Linux
|
||||
if user_platform == "Windows" or user_platform == "Linux":
|
||||
def _download_image(url, path):
|
||||
response = requests.get(url)
|
||||
with open(path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
|
||||
cursor_path = os.path.join("screenshots", "cursor.png")
|
||||
if not os.path.exists(cursor_path):
|
||||
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
|
||||
_download_image(cursor_url, cursor_path)
|
||||
screenshot = pyautogui.screenshot()
|
||||
cursor_x, cursor_y = pyautogui.position()
|
||||
cursor = Image.open(cursor_path)
|
||||
# make the cursor smaller
|
||||
cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5)))
|
||||
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
|
||||
screenshot.save(file_path)
|
||||
# elif user_platform == "Linux":
|
||||
# # Use xlib to prevent scrot dependency for Linux
|
||||
# screen = Xlib.display.Display().screen()
|
||||
# size = screen.width_in_pixels, screen.height_in_pixels
|
||||
# screenshot = ImageGrab.grab(bbox=(0, 0, size[0], size[1]))
|
||||
# screenshot.save(file_path)
|
||||
elif user_platform == "Darwin": # (Mac OS)
|
||||
# Use the screencapture utility to capture the screen with the cursor
|
||||
subprocess.run(["screencapture", "-C", file_path])
|
||||
else:
|
||||
print(f"The platform you're using ({user_platform}) is not currently supported")
|
||||
|
||||
return send_file(file_path, mimetype='image/png')
|
||||
|
||||
|
||||
@app.route('/file', methods=['POST'])
|
||||
def get_file():
|
||||
# Retrieve filename from the POST request
|
||||
if 'file_path' in request.form:
|
||||
file_path = request.form['file_path']
|
||||
else:
|
||||
return jsonify({"error": "file_path is required"}), 400
|
||||
|
||||
try:
|
||||
# Check if the file exists and send it to the user
|
||||
return send_file(file_path, as_attachment=True)
|
||||
except FileNotFoundError:
|
||||
# If the file is not found, return a 404 error
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
|
||||
@app.route('/platform', methods=['GET'])
|
||||
def get_platform():
|
||||
return platform.system()
|
||||
|
||||
|
||||
@app.route('/cursor_position', methods=['GET'])
|
||||
def get_cursor_position():
|
||||
return pyautogui.position().x, pyautogui.position().y
|
||||
|
||||
|
||||
@app.route("/setup/change_wallpaper", methods=['POST'])
|
||||
def change_wallpaper():
|
||||
data = request.json
|
||||
path = data.get('path', None)
|
||||
|
||||
if not path:
|
||||
return "Path not supplied!", 400
|
||||
|
||||
path = Path(path)
|
||||
|
||||
if not path.exists():
|
||||
return f"File not found: {path}", 404
|
||||
|
||||
try:
|
||||
user_platform = platform.system()
|
||||
if user_platform == "Windows":
|
||||
import ctypes
|
||||
ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3)
|
||||
elif user_platform == "Linux":
|
||||
import subprocess
|
||||
subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"])
|
||||
elif user_platform == "Darwin": # (Mac OS)
|
||||
import subprocess
|
||||
subprocess.run(
|
||||
["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"'])
|
||||
return "Wallpaper changed successfully"
|
||||
except Exception as e:
|
||||
return f"Failed to change wallpaper. Error: {e}", 500
|
||||
|
||||
|
||||
@app.route("/setup/download_file", methods=['POST'])
|
||||
def download_file():
|
||||
data = request.json
|
||||
url = data.get('url', None)
|
||||
path = data.get('path', None)
|
||||
|
||||
if not url or not path:
|
||||
return "Path or URL not supplied!", 400
|
||||
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
max_retries = 3
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return "File downloaded successfully"
|
||||
|
||||
except requests.RequestException as e:
|
||||
print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
|
||||
return f"Failed to download {url}. No retries left. Error: {e}", 500
|
||||
|
||||
|
||||
@app.route("/setup/open_file", methods=['POST'])
|
||||
def open_file():
|
||||
data = request.json
|
||||
path = data.get('path', None)
|
||||
|
||||
if not path:
|
||||
return "Path not supplied!", 400
|
||||
|
||||
path = Path(path)
|
||||
|
||||
if not path.exists():
|
||||
return f"File not found: {path}", 404
|
||||
|
||||
try:
|
||||
os.startfile(path)
|
||||
return "File opened successfully"
|
||||
except Exception as e:
|
||||
return f"Failed to open {path}. Error: {e}", 500
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host="0.0.0.0")
|
||||
6
desktop_env/server/requirements.txt
Normal file
6
desktop_env/server/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
python3-xlib==0.15
|
||||
PyAutoGUI==0.9.54
|
||||
Pillow==10.1.0
|
||||
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
|
||||
requests
|
||||
flask
|
||||
@@ -1,29 +0,0 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import subprocess
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/execute', methods=['POST'])
|
||||
def execute_command():
|
||||
data = request.json
|
||||
# The 'command' key in the JSON request should contain the command to be executed.
|
||||
command = data.get('command', '')
|
||||
|
||||
# Execute the command without any safety checks.
|
||||
try:
|
||||
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'output': stdout.decode(),
|
||||
'error': stderr.decode()
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': str(e)
|
||||
}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host="0.0.0.0")
|
||||
Reference in New Issue
Block a user