mouse and keyboard controllers for windows and linux

This commit is contained in:
Jing Hua
2023-11-08 09:22:43 +08:00
parent 8bc4459f41
commit a8aebf5d15
9 changed files with 271 additions and 48 deletions

View File

@@ -22,9 +22,16 @@
2. `rm -rf ~/screenshot.png` 2. `rm -rf ~/screenshot.png`
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool) 7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
## Windows setup guide
1. Copy and paste the file `windows_server/main.py` to the windows vm
2. Make sure `mouse` and `keyboard` are installed
3. Run the file `pythonw main.py`
4. `ipconfig /all` and find the ip address
## Road map (Proposed) ## Road map (Proposed)
- [ ] Explore VMWare, and whether it can be connected and control through mouse package - [x] Explore VMWare, and whether it can be connected and control through mouse package
- [x] Explore Windows and MacOS, whether it can be installed - [x] Explore Windows and MacOS, whether it can be installed
- MacOS is closed source and cannot be legally installed - MacOS is closed source and cannot be legally installed
- Windows is available legally and can be installed - Windows is available legally and can be installed

View File

@@ -1,9 +1,10 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from fabric import Connection from fabric import Connection
from xdotool import XDoToolController from .xdotool import XDoToolController
from .python import PythonController
class AbstractMouseController(ABC): class AbstractKeyboardController(ABC):
@abstractmethod @abstractmethod
def type(self, text: str): def type(self, text: str):
raise NotImplementedError raise NotImplementedError
@@ -12,7 +13,7 @@ class AbstractMouseController(ABC):
def key(self, key: str): def key(self, key: str):
raise NotImplementedError raise NotImplementedError
class XDoToolKeyboardController(AbstractMouseController, XDoToolController): class XDoToolKeyboardController(AbstractKeyboardController, XDoToolController):
def __init__(self, ssh_connection: Connection): def __init__(self, ssh_connection: Connection):
super().__init__(ssh_connection=ssh_connection) super().__init__(ssh_connection=ssh_connection)
@@ -22,16 +23,13 @@ class XDoToolKeyboardController(AbstractMouseController, XDoToolController):
def key(self, key: str): def key(self, key: str):
self._execute_xdotool_command(f"key {key}") self._execute_xdotool_command(f"key {key}")
class PythonKeyboardController(AbstractMouseController): class PythonKeyboardController(AbstractKeyboardController, PythonController):
def __init__(self, ssh_connection: Connection): def __init__(self, http_server: str):
self.ssh_connection = ssh_connection super().__init__(http_server=http_server)
self.command = "python -c \"import keyboard; {command}\""
def _execute_python_command(self, command: list[str]) -> None:
result = self.ssh_connection.run(f"sudo python3 -c 'import keyboard; keyboard.{command}'", hide=True)
return result.stdout.strip()
def type(self, text: str): def type(self, text: str):
self._execute_python_command(f"write({text})") self._execute_python_command(self.command.format(command=f"keyboard.write('{text}')"))
def key(self, key: str): def key(self, key: str):
self._execute_python_command(f"press_and_release({key})") self._execute_python_command(self.command.format(command=f"keyboard.press_and_release('{key}')"))

View File

@@ -1,37 +1,144 @@
from enum import Enum
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from fabric import Connection from fabric import Connection
from xdotool import XDoToolController from .xdotool import XDoToolController
from .python import PythonController
class MouseClick(Enum):
LEFT = 1
MIDDLE = 2
RIGHT = 3
WHEEL_UP = 4
WHEEL_DOWN = 5
class AbstractMouseController(ABC): class AbstractMouseController(ABC):
@abstractmethod @abstractmethod
def type(self, text: str): def mouse_move(self, x: int, y: int):
raise NotImplementedError raise NotImplementedError
@abstractmethod @abstractmethod
def key(self, key: str): def left_down(self):
raise NotImplementedError
@abstractmethod
def left_up(self):
raise NotImplementedError
@abstractmethod
def left_click(self):
raise NotImplementedError raise NotImplementedError
class XDoToolKeyboardController(AbstractMouseController, XDoToolController): @abstractmethod
def middle_down(self):
raise NotImplementedError
@abstractmethod
def middle_up(self):
raise NotImplementedError
@abstractmethod
def middle_click(self):
raise NotImplementedError
@abstractmethod
def right_down(self):
raise NotImplementedError
@abstractmethod
def right_up(self):
raise NotImplementedError
@abstractmethod
def right_click(self):
raise NotImplementedError
@abstractmethod
def scroll_up(self):
raise NotImplementedError
@abstractmethod
def scroll_down(self):
raise NotImplementedError
class XDoToolMouseController(AbstractMouseController, XDoToolController):
def __init__(self, ssh_connection: Connection): def __init__(self, ssh_connection: Connection):
super().__init__(ssh_connection=ssh_connection) super().__init__(ssh_connection=ssh_connection)
def type(self, text: str): def mouse_move(self, x: int, y: int):
self._execute_xdotool_command(f"type {text}") self._execute_xdotool_command(f"mousemove {x} {y}")
def key(self, key: str): def left_down(self):
self._execute_xdotool_command(f"key {key}") self._execute_xdotool_command(f"mousedown 1")
class PythonKeyboardController(AbstractMouseController):
def __init__(self, ssh_connection: Connection):
self.ssh_connection = ssh_connection
def _execute_python_command(self, command: list[str]) -> None: def left_up(self):
result = self.ssh_connection.run(f"sudo python3 -c 'import keyboard; keyboard.{command}'", hide=True) self._execute_xdotool_command(f"mouseup 1")
return result.stdout.strip()
def type(self, text: str): def left_click(self):
self._execute_python_command(f"write({text})") self._execute_xdotool_command(f"click 1")
def key(self, key: str): def middle_down(self):
self._execute_python_command(f"press_and_release({key})") self._execute_xdotool_command(f"mousedown 2")
def middle_up(self):
self._execute_xdotool_command(f"mouseup 2")
def middle_click(self):
self._execute_xdotool_command(f"click 2")
def right_down(self):
self._execute_xdotool_command(f"mousedown 3")
def right_up(self):
self._execute_xdotool_command(f"mouseup 3")
def right_click(self):
self._execute_xdotool_command(f"click 3")
def scroll_up(self):
self._execute_xdotool_command(f"click 4")
def scroll_down(self):
self._execute_xdotool_command(f"click 5")
class PythonMouseController(AbstractMouseController, PythonController):
def __init__(self, http_server: str):
super().__init__(http_server=http_server)
self.command = "python -c \"import mouse; {command}\""
def mouse_move(self, x: int, y: int):
self._execute_python_command(self.command.format(command=f"mouse.move({x}, {y})"))
def left_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='left')"))
def left_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='left')"))
def left_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='left')"))
def middle_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='middle')"))
def middle_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='middle')"))
def middle_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='middle')"))
def right_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='right')"))
def right_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='right')"))
def right_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='right')"))
def scroll_up(self):
self._execute_python_command(self.command.format(command="mouse.wheel(10)"))
def scroll_down(self):
self._execute_python_command(self.command.format(command="mouse.wheel(-10)"))

View File

@@ -0,0 +1,34 @@
import requests
import json
class PythonController:
def __init__(self, http_server: str):
self.http_server = http_server
def _execute_python_command(self, command: str) -> None:
payload = json.dumps({
"command": command
})
headers = {
'Content-Type': 'application/json'
}
try:
response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
if response.status_code == 200:
print("Command executed successfully:", response.text)
else:
print("Failed to execute command. Status code:", response.status_code)
except requests.exceptions.RequestException as e:
print("An error occurred while trying to execute the command:", e)
# example usage
if __name__ == '__main__':
# replace with your actual server URL of the vm
server_url = "http://192.168.7.129:5000"
controller = PythonController(server_url)
# example commands
python_command = "python -c \"import keyboard; keyboard.write('hello world')\""
python_command = "python -c \"import mouse; mouse.move(100,100);mouse.right_click()\""
controller._execute_python_command(python_command)

View File

@@ -1,4 +1,5 @@
from enum import Enum from enum import Enum
from typing import Literal
import subprocess import subprocess
from fabric import Connection from fabric import Connection
import time import time
@@ -8,6 +9,9 @@ from gymnasium import spaces
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from desktop_env.controllers.mouse import MouseClick, AbstractMouseController, XDoToolMouseController, PythonMouseController
from desktop_env.controllers.keyboard import AbstractKeyboardController, XDoToolKeyboardController, PythonKeyboardController
class Action(Enum): class Action(Enum):
CLICK = 0 CLICK = 0
MOUSE_DOWN = 1 MOUSE_DOWN = 1
@@ -16,24 +20,18 @@ class Action(Enum):
KEY = 4 KEY = 4
TYPE = 5 TYPE = 5
class MouseClick(Enum): VM_TYPE = Literal['ubuntu', 'windows']
LEFT = 1
MIDDLE = 2
RIGHT = 3
WHEEL_UP = 4
WHEEL_DOWN = 5
class DesktopEnv(gym.Env): class DesktopEnv(gym.Env):
"""DesktopEnv with OpenAI Gym interface.""" """DesktopEnv with OpenAI Gym interface."""
def __init__(self, path_to_vm: str, username: str, password: str, def __init__(self, path_to_vm: str, username: str, password: str,
host: str, snapshot_path: str = "snapshot"): host: str, snapshot_path: str = "snapshot", vm_os: VM_TYPE = "ubuntu"):
self.path_to_vm = path_to_vm self.path_to_vm = path_to_vm
self.username = username self.username = username
self.password = password self.password = password
self.host = host self.host = host
self.snapshot_path = snapshot_path self.snapshot_path = snapshot_path
self.ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": password})
self.screen_width = 800 self.screen_width = 800
self.screen_height = 800 self.screen_height = 800
@@ -54,6 +52,22 @@ class DesktopEnv(gym.Env):
self._start_emulator() self._start_emulator()
self._wait_for_emulator_load() self._wait_for_emulator_load()
# set up controllers
self.mouse_controller, self.keyboard_controller = self._create_controllers(vm_os)
def _create_controllers(self, vm_os: VM_TYPE) -> tuple[AbstractMouseController, AbstractKeyboardController]:
if vm_os == "ubuntu":
ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": self.password})
mouse_controller = XDoToolMouseController(ssh_connection)
keyboard_controller = XDoToolKeyboardController(ssh_connection)
elif vm_os == "windows":
mouse_controller = PythonMouseController(http_server=self.host)
keyboard_controller = PythonKeyboardController(http_server=self.host)
else:
raise NotImplementedError(vm_os)
return mouse_controller, keyboard_controller
def _start_emulator(self): def _start_emulator(self):
self._execute_command(["vmrun", "start", self.path_to_vm]) self._execute_command(["vmrun", "start", self.path_to_vm])
@@ -133,19 +147,49 @@ class DesktopEnv(gym.Env):
def step(self, action): def step(self, action):
action_type = Action(action['action_type']) action_type = Action(action['action_type'])
if action_type == Action.CLICK: if action_type == Action.CLICK:
self._click(MouseClick(action['click_type'])) click = MouseClick(action['click_type'])
if click == MouseClick.LEFT:
self.mouse_controller.left_click()
elif click == MouseClick.MIDDLE:
self.mouse_controller.middle_click()
elif click == MouseClick.RIGHT:
self.mouse_controller.right_click()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_DOWN: elif action_type == Action.MOUSE_DOWN:
self._mousedown(MouseClick(action['click_type'])) click = MouseClick(action['click_type'])
if click == MouseClick.LEFT:
self.mouse_controller.left_down()
elif click == MouseClick.MIDDLE:
self.mouse_controller.middle_down()
elif click == MouseClick.RIGHT:
self.mouse_controller.right_down()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_UP: elif action_type == Action.MOUSE_UP:
self._mouseup(MouseClick(action['click_type'])) click = MouseClick(action['click_type'])
if click == MouseClick.LEFT:
self.mouse_controller.left_up()
elif click == MouseClick.MIDDLE:
self.mouse_controller.middle_up()
elif click == MouseClick.RIGHT:
self.mouse_controller.right_up()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_MOVE: elif action_type == Action.MOUSE_MOVE:
self._mouse_move(action['x'], action['y']) self.mouse_controller.mouse_move(x = action['x'], y = action['y'])
elif action_type == Action.KEY: elif action_type == Action.KEY:
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
self.key(key_sequence) self.keyboard_controller.key(key_sequence)
elif action_type == Action.TYPE: elif action_type == Action.TYPE:
text = ''.join(map(chr, action['text'])) # Convert integer array to string text = ''.join(map(chr, action['text'])) # Convert integer array to string
self._type(text) self.keyboard_controller.type(text)
# Capture new state # Capture new state
observation = self._get_obs() observation = self._get_obs()

View File

@@ -0,0 +1,29 @@
from flask import Flask, request, jsonify
import subprocess
app = Flask(__name__)
@app.route('/execute', methods=['POST'])
def execute_command():
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
command = data.get('command', '')
# Execute the command without any safety checks.
try:
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
return jsonify({
'status': 'success',
'output': stdout.decode(),
'error': stderr.decode()
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0")

View File

@@ -38,10 +38,13 @@ def human_agent():
""" """
Runs the Gym environment with human input. Runs the Gym environment with human input.
""" """
env = DesktopEnv(path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", env = DesktopEnv(path_to_vm="/home/yuri/vmware/Windows 10 x64/Windows 10 x64.vmx",
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
username="user", username="user",
password="password", password="password",
host="192.168.7.128") # host="192.168.7.128",
host="http://192.168.7.129:5000",
vm_os="windows")
observation = env.reset() observation = env.reset()
done = False done = False

View File

@@ -2,3 +2,4 @@ numpy
Pillow Pillow
fabric fabric
gymnasium gymnasium
requests

BIN
screenshot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 356 KiB