mouse and keyboard controllers for windows and linux
This commit is contained in:
@@ -22,9 +22,16 @@
|
|||||||
2. `rm -rf ~/screenshot.png`
|
2. `rm -rf ~/screenshot.png`
|
||||||
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
|
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
|
||||||
|
|
||||||
|
## Windows setup guide
|
||||||
|
|
||||||
|
1. Copy and paste the file `windows_server/main.py` to the windows vm
|
||||||
|
2. Make sure `mouse` and `keyboard` are installed
|
||||||
|
3. Run the file `pythonw main.py`
|
||||||
|
4. `ipconfig /all` and find the ip address
|
||||||
|
|
||||||
## Road map (Proposed)
|
## Road map (Proposed)
|
||||||
|
|
||||||
- [ ] Explore VMWare, and whether it can be connected and control through mouse package
|
- [x] Explore VMWare, and whether it can be connected and control through mouse package
|
||||||
- [x] Explore Windows and MacOS, whether it can be installed
|
- [x] Explore Windows and MacOS, whether it can be installed
|
||||||
- MacOS is closed source and cannot be legally installed
|
- MacOS is closed source and cannot be legally installed
|
||||||
- Windows is available legally and can be installed
|
- Windows is available legally and can be installed
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from fabric import Connection
|
from fabric import Connection
|
||||||
|
|
||||||
from xdotool import XDoToolController
|
from .xdotool import XDoToolController
|
||||||
|
from .python import PythonController
|
||||||
|
|
||||||
class AbstractMouseController(ABC):
|
class AbstractKeyboardController(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def type(self, text: str):
|
def type(self, text: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
@@ -12,7 +13,7 @@ class AbstractMouseController(ABC):
|
|||||||
def key(self, key: str):
|
def key(self, key: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
class XDoToolKeyboardController(AbstractMouseController, XDoToolController):
|
class XDoToolKeyboardController(AbstractKeyboardController, XDoToolController):
|
||||||
def __init__(self, ssh_connection: Connection):
|
def __init__(self, ssh_connection: Connection):
|
||||||
super().__init__(ssh_connection=ssh_connection)
|
super().__init__(ssh_connection=ssh_connection)
|
||||||
|
|
||||||
@@ -22,16 +23,13 @@ class XDoToolKeyboardController(AbstractMouseController, XDoToolController):
|
|||||||
def key(self, key: str):
|
def key(self, key: str):
|
||||||
self._execute_xdotool_command(f"key {key}")
|
self._execute_xdotool_command(f"key {key}")
|
||||||
|
|
||||||
class PythonKeyboardController(AbstractMouseController):
|
class PythonKeyboardController(AbstractKeyboardController, PythonController):
|
||||||
def __init__(self, ssh_connection: Connection):
|
def __init__(self, http_server: str):
|
||||||
self.ssh_connection = ssh_connection
|
super().__init__(http_server=http_server)
|
||||||
|
self.command = "python -c \"import keyboard; {command}\""
|
||||||
def _execute_python_command(self, command: list[str]) -> None:
|
|
||||||
result = self.ssh_connection.run(f"sudo python3 -c 'import keyboard; keyboard.{command}'", hide=True)
|
|
||||||
return result.stdout.strip()
|
|
||||||
|
|
||||||
def type(self, text: str):
|
def type(self, text: str):
|
||||||
self._execute_python_command(f"write({text})")
|
self._execute_python_command(self.command.format(command=f"keyboard.write('{text}')"))
|
||||||
|
|
||||||
def key(self, key: str):
|
def key(self, key: str):
|
||||||
self._execute_python_command(f"press_and_release({key})")
|
self._execute_python_command(self.command.format(command=f"keyboard.press_and_release('{key}')"))
|
||||||
@@ -1,37 +1,144 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from fabric import Connection
|
from fabric import Connection
|
||||||
|
|
||||||
from xdotool import XDoToolController
|
from .xdotool import XDoToolController
|
||||||
|
from .python import PythonController
|
||||||
|
class MouseClick(Enum):
|
||||||
|
LEFT = 1
|
||||||
|
MIDDLE = 2
|
||||||
|
RIGHT = 3
|
||||||
|
WHEEL_UP = 4
|
||||||
|
WHEEL_DOWN = 5
|
||||||
|
|
||||||
class AbstractMouseController(ABC):
|
class AbstractMouseController(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def type(self, text: str):
|
def mouse_move(self, x: int, y: int):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def key(self, key: str):
|
def left_down(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def left_up(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def left_click(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
class XDoToolKeyboardController(AbstractMouseController, XDoToolController):
|
@abstractmethod
|
||||||
|
def middle_down(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def middle_up(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def middle_click(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def right_down(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def right_up(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def right_click(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def scroll_up(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def scroll_down(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class XDoToolMouseController(AbstractMouseController, XDoToolController):
|
||||||
def __init__(self, ssh_connection: Connection):
|
def __init__(self, ssh_connection: Connection):
|
||||||
super().__init__(ssh_connection=ssh_connection)
|
super().__init__(ssh_connection=ssh_connection)
|
||||||
|
|
||||||
def type(self, text: str):
|
def mouse_move(self, x: int, y: int):
|
||||||
self._execute_xdotool_command(f"type {text}")
|
self._execute_xdotool_command(f"mousemove {x} {y}")
|
||||||
|
|
||||||
def key(self, key: str):
|
def left_down(self):
|
||||||
self._execute_xdotool_command(f"key {key}")
|
self._execute_xdotool_command(f"mousedown 1")
|
||||||
|
|
||||||
class PythonKeyboardController(AbstractMouseController):
|
|
||||||
def __init__(self, ssh_connection: Connection):
|
|
||||||
self.ssh_connection = ssh_connection
|
|
||||||
|
|
||||||
def _execute_python_command(self, command: list[str]) -> None:
|
def left_up(self):
|
||||||
result = self.ssh_connection.run(f"sudo python3 -c 'import keyboard; keyboard.{command}'", hide=True)
|
self._execute_xdotool_command(f"mouseup 1")
|
||||||
return result.stdout.strip()
|
|
||||||
|
|
||||||
def type(self, text: str):
|
def left_click(self):
|
||||||
self._execute_python_command(f"write({text})")
|
self._execute_xdotool_command(f"click 1")
|
||||||
|
|
||||||
def key(self, key: str):
|
def middle_down(self):
|
||||||
self._execute_python_command(f"press_and_release({key})")
|
self._execute_xdotool_command(f"mousedown 2")
|
||||||
|
|
||||||
|
def middle_up(self):
|
||||||
|
self._execute_xdotool_command(f"mouseup 2")
|
||||||
|
|
||||||
|
def middle_click(self):
|
||||||
|
self._execute_xdotool_command(f"click 2")
|
||||||
|
|
||||||
|
def right_down(self):
|
||||||
|
self._execute_xdotool_command(f"mousedown 3")
|
||||||
|
|
||||||
|
def right_up(self):
|
||||||
|
self._execute_xdotool_command(f"mouseup 3")
|
||||||
|
|
||||||
|
def right_click(self):
|
||||||
|
self._execute_xdotool_command(f"click 3")
|
||||||
|
|
||||||
|
def scroll_up(self):
|
||||||
|
self._execute_xdotool_command(f"click 4")
|
||||||
|
|
||||||
|
def scroll_down(self):
|
||||||
|
self._execute_xdotool_command(f"click 5")
|
||||||
|
|
||||||
|
class PythonMouseController(AbstractMouseController, PythonController):
|
||||||
|
def __init__(self, http_server: str):
|
||||||
|
super().__init__(http_server=http_server)
|
||||||
|
self.command = "python -c \"import mouse; {command}\""
|
||||||
|
|
||||||
|
def mouse_move(self, x: int, y: int):
|
||||||
|
self._execute_python_command(self.command.format(command=f"mouse.move({x}, {y})"))
|
||||||
|
|
||||||
|
def left_down(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.press(button='left')"))
|
||||||
|
|
||||||
|
def left_up(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.release(button='left')"))
|
||||||
|
|
||||||
|
def left_click(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.click(button='left')"))
|
||||||
|
|
||||||
|
def middle_down(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.press(button='middle')"))
|
||||||
|
|
||||||
|
def middle_up(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.release(button='middle')"))
|
||||||
|
|
||||||
|
def middle_click(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.click(button='middle')"))
|
||||||
|
|
||||||
|
def right_down(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.press(button='right')"))
|
||||||
|
|
||||||
|
def right_up(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.release(button='right')"))
|
||||||
|
|
||||||
|
def right_click(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.click(button='right')"))
|
||||||
|
|
||||||
|
def scroll_up(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.wheel(10)"))
|
||||||
|
|
||||||
|
def scroll_down(self):
|
||||||
|
self._execute_python_command(self.command.format(command="mouse.wheel(-10)"))
|
||||||
34
desktop_env/controllers/python.py
Normal file
34
desktop_env/controllers/python.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
class PythonController:
|
||||||
|
def __init__(self, http_server: str):
|
||||||
|
self.http_server = http_server
|
||||||
|
|
||||||
|
def _execute_python_command(self, command: str) -> None:
|
||||||
|
payload = json.dumps({
|
||||||
|
"command": command
|
||||||
|
})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("Command executed successfully:", response.text)
|
||||||
|
else:
|
||||||
|
print("Failed to execute command. Status code:", response.status_code)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print("An error occurred while trying to execute the command:", e)
|
||||||
|
|
||||||
|
# example usage
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# replace with your actual server URL of the vm
|
||||||
|
server_url = "http://192.168.7.129:5000"
|
||||||
|
controller = PythonController(server_url)
|
||||||
|
|
||||||
|
# example commands
|
||||||
|
python_command = "python -c \"import keyboard; keyboard.write('hello world')\""
|
||||||
|
python_command = "python -c \"import mouse; mouse.move(100,100);mouse.right_click()\""
|
||||||
|
controller._execute_python_command(python_command)
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import Literal
|
||||||
import subprocess
|
import subprocess
|
||||||
from fabric import Connection
|
from fabric import Connection
|
||||||
import time
|
import time
|
||||||
@@ -8,6 +9,9 @@ from gymnasium import spaces
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from desktop_env.controllers.mouse import MouseClick, AbstractMouseController, XDoToolMouseController, PythonMouseController
|
||||||
|
from desktop_env.controllers.keyboard import AbstractKeyboardController, XDoToolKeyboardController, PythonKeyboardController
|
||||||
|
|
||||||
class Action(Enum):
|
class Action(Enum):
|
||||||
CLICK = 0
|
CLICK = 0
|
||||||
MOUSE_DOWN = 1
|
MOUSE_DOWN = 1
|
||||||
@@ -16,24 +20,18 @@ class Action(Enum):
|
|||||||
KEY = 4
|
KEY = 4
|
||||||
TYPE = 5
|
TYPE = 5
|
||||||
|
|
||||||
class MouseClick(Enum):
|
VM_TYPE = Literal['ubuntu', 'windows']
|
||||||
LEFT = 1
|
|
||||||
MIDDLE = 2
|
|
||||||
RIGHT = 3
|
|
||||||
WHEEL_UP = 4
|
|
||||||
WHEEL_DOWN = 5
|
|
||||||
|
|
||||||
class DesktopEnv(gym.Env):
|
class DesktopEnv(gym.Env):
|
||||||
"""DesktopEnv with OpenAI Gym interface."""
|
"""DesktopEnv with OpenAI Gym interface."""
|
||||||
|
|
||||||
def __init__(self, path_to_vm: str, username: str, password: str,
|
def __init__(self, path_to_vm: str, username: str, password: str,
|
||||||
host: str, snapshot_path: str = "snapshot"):
|
host: str, snapshot_path: str = "snapshot", vm_os: VM_TYPE = "ubuntu"):
|
||||||
self.path_to_vm = path_to_vm
|
self.path_to_vm = path_to_vm
|
||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
self.host = host
|
self.host = host
|
||||||
self.snapshot_path = snapshot_path
|
self.snapshot_path = snapshot_path
|
||||||
self.ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": password})
|
|
||||||
|
|
||||||
self.screen_width = 800
|
self.screen_width = 800
|
||||||
self.screen_height = 800
|
self.screen_height = 800
|
||||||
@@ -54,6 +52,22 @@ class DesktopEnv(gym.Env):
|
|||||||
self._start_emulator()
|
self._start_emulator()
|
||||||
self._wait_for_emulator_load()
|
self._wait_for_emulator_load()
|
||||||
|
|
||||||
|
# set up controllers
|
||||||
|
self.mouse_controller, self.keyboard_controller = self._create_controllers(vm_os)
|
||||||
|
|
||||||
|
def _create_controllers(self, vm_os: VM_TYPE) -> tuple[AbstractMouseController, AbstractKeyboardController]:
|
||||||
|
if vm_os == "ubuntu":
|
||||||
|
ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": self.password})
|
||||||
|
mouse_controller = XDoToolMouseController(ssh_connection)
|
||||||
|
keyboard_controller = XDoToolKeyboardController(ssh_connection)
|
||||||
|
elif vm_os == "windows":
|
||||||
|
mouse_controller = PythonMouseController(http_server=self.host)
|
||||||
|
keyboard_controller = PythonKeyboardController(http_server=self.host)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(vm_os)
|
||||||
|
|
||||||
|
return mouse_controller, keyboard_controller
|
||||||
|
|
||||||
def _start_emulator(self):
|
def _start_emulator(self):
|
||||||
self._execute_command(["vmrun", "start", self.path_to_vm])
|
self._execute_command(["vmrun", "start", self.path_to_vm])
|
||||||
|
|
||||||
@@ -133,19 +147,49 @@ class DesktopEnv(gym.Env):
|
|||||||
def step(self, action):
|
def step(self, action):
|
||||||
action_type = Action(action['action_type'])
|
action_type = Action(action['action_type'])
|
||||||
if action_type == Action.CLICK:
|
if action_type == Action.CLICK:
|
||||||
self._click(MouseClick(action['click_type']))
|
click = MouseClick(action['click_type'])
|
||||||
|
if click == MouseClick.LEFT:
|
||||||
|
self.mouse_controller.left_click()
|
||||||
|
elif click == MouseClick.MIDDLE:
|
||||||
|
self.mouse_controller.middle_click()
|
||||||
|
elif click == MouseClick.RIGHT:
|
||||||
|
self.mouse_controller.right_click()
|
||||||
|
elif click == MouseClick.WHEEL_UP:
|
||||||
|
self.mouse_controller.scroll_up()
|
||||||
|
elif click == MouseClick.WHEEL_DOWN:
|
||||||
|
self.mouse_controller.scroll_down()
|
||||||
elif action_type == Action.MOUSE_DOWN:
|
elif action_type == Action.MOUSE_DOWN:
|
||||||
self._mousedown(MouseClick(action['click_type']))
|
click = MouseClick(action['click_type'])
|
||||||
|
if click == MouseClick.LEFT:
|
||||||
|
self.mouse_controller.left_down()
|
||||||
|
elif click == MouseClick.MIDDLE:
|
||||||
|
self.mouse_controller.middle_down()
|
||||||
|
elif click == MouseClick.RIGHT:
|
||||||
|
self.mouse_controller.right_down()
|
||||||
|
elif click == MouseClick.WHEEL_UP:
|
||||||
|
self.mouse_controller.scroll_up()
|
||||||
|
elif click == MouseClick.WHEEL_DOWN:
|
||||||
|
self.mouse_controller.scroll_down()
|
||||||
elif action_type == Action.MOUSE_UP:
|
elif action_type == Action.MOUSE_UP:
|
||||||
self._mouseup(MouseClick(action['click_type']))
|
click = MouseClick(action['click_type'])
|
||||||
|
if click == MouseClick.LEFT:
|
||||||
|
self.mouse_controller.left_up()
|
||||||
|
elif click == MouseClick.MIDDLE:
|
||||||
|
self.mouse_controller.middle_up()
|
||||||
|
elif click == MouseClick.RIGHT:
|
||||||
|
self.mouse_controller.right_up()
|
||||||
|
elif click == MouseClick.WHEEL_UP:
|
||||||
|
self.mouse_controller.scroll_up()
|
||||||
|
elif click == MouseClick.WHEEL_DOWN:
|
||||||
|
self.mouse_controller.scroll_down()
|
||||||
elif action_type == Action.MOUSE_MOVE:
|
elif action_type == Action.MOUSE_MOVE:
|
||||||
self._mouse_move(action['x'], action['y'])
|
self.mouse_controller.mouse_move(x = action['x'], y = action['y'])
|
||||||
elif action_type == Action.KEY:
|
elif action_type == Action.KEY:
|
||||||
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
||||||
self.key(key_sequence)
|
self.keyboard_controller.key(key_sequence)
|
||||||
elif action_type == Action.TYPE:
|
elif action_type == Action.TYPE:
|
||||||
text = ''.join(map(chr, action['text'])) # Convert integer array to string
|
text = ''.join(map(chr, action['text'])) # Convert integer array to string
|
||||||
self._type(text)
|
self.keyboard_controller.type(text)
|
||||||
|
|
||||||
# Capture new state
|
# Capture new state
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
|
|||||||
29
desktop_env/windows_server/main.py
Normal file
29
desktop_env/windows_server/main.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
from flask import Flask, request, jsonify
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/execute', methods=['POST'])
|
||||||
|
def execute_command():
|
||||||
|
data = request.json
|
||||||
|
# The 'command' key in the JSON request should contain the command to be executed.
|
||||||
|
command = data.get('command', '')
|
||||||
|
|
||||||
|
# Execute the command without any safety checks.
|
||||||
|
try:
|
||||||
|
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'status': 'success',
|
||||||
|
'output': stdout.decode(),
|
||||||
|
'error': stderr.decode()
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': str(e)
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True, host="0.0.0.0")
|
||||||
7
main.py
7
main.py
@@ -38,10 +38,13 @@ def human_agent():
|
|||||||
"""
|
"""
|
||||||
Runs the Gym environment with human input.
|
Runs the Gym environment with human input.
|
||||||
"""
|
"""
|
||||||
env = DesktopEnv(path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
env = DesktopEnv(path_to_vm="/home/yuri/vmware/Windows 10 x64/Windows 10 x64.vmx",
|
||||||
|
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
||||||
username="user",
|
username="user",
|
||||||
password="password",
|
password="password",
|
||||||
host="192.168.7.128")
|
# host="192.168.7.128",
|
||||||
|
host="http://192.168.7.129:5000",
|
||||||
|
vm_os="windows")
|
||||||
observation = env.reset()
|
observation = env.reset()
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
|
|||||||
@@ -2,3 +2,4 @@ numpy
|
|||||||
Pillow
|
Pillow
|
||||||
fabric
|
fabric
|
||||||
gymnasium
|
gymnasium
|
||||||
|
requests
|
||||||
|
|||||||
BIN
screenshot.png
Normal file
BIN
screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 356 KiB |
Reference in New Issue
Block a user