Fix conflicts

This commit is contained in:
Timothyxxx
2023-12-16 21:32:43 +08:00
43 changed files with 4124 additions and 631 deletions

View File

@@ -1,7 +1,8 @@
# DesktopEnv: A Learning Environment for Human-like Computer Task Mastery # DesktopEnv: An Environment towards Human-like Computer Task Mastery
## Setup guide ## Setup guide
### For members of the team
1. Download OS image 1. Download OS image
1. Download kubuntu from <https://kubuntu.org/getkubuntu/> 1. Download kubuntu from <https://kubuntu.org/getkubuntu/>
2. Download ubuntu from <https://ubuntu.com/download/desktop> 2. Download ubuntu from <https://ubuntu.com/download/desktop>
@@ -22,7 +23,8 @@
2. `rm -rf ~/screenshot.png` 2. `rm -rf ~/screenshot.png`
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool) 7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
### For users of the environment
todo
## Road map (Proposed) ## Road map (Proposed)

View File

@@ -1,23 +1,6 @@
# Server Setup Guide # Server Setup Guide
- [Linux](#linux) 1. Copy and paste the file `server/main.py` to the windows vm
- [Windows](#windows) 2. Install the requirements `pip install -r requirements.txt`
## Linux
<https://averagelinuxuser.com/ssh-into-virtualbox/>
1. `sudo apt install openssh-server`
2. `sudo systemctl enable ssh --now`
3. `sudo ufw disable` (disable firewall - safe for local network, otherwise `sudo ufw allow ssh`)
4. `ip a` - find ip address
5. ssh username@<ip_address>
6. On host, run `ssh-copy-id <username>@<ip_address>`
## Windows
1. Copy and paste the file `windows_server/main.py` to the windows vm
2. Make sure `mouse` and `keyboard` are installed
3. Run the file `python main.py` 3. Run the file `python main.py`
4. `ipconfig /all` and find the ip address 4. `ipconfig /all` and find the ip address

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -1,56 +0,0 @@
from abc import ABC, abstractmethod
from fabric import Connection
from .xdotool import XDoToolController
from .python import PythonController
class AbstractKeyboardController(ABC):
@abstractmethod
def type(self, text: str):
raise NotImplementedError
@abstractmethod
def key(self, key: str):
raise NotImplementedError
@abstractmethod
def key_down(self, key: str):
raise NotImplementedError
@abstractmethod
def key_up(self, key: str):
raise NotImplementedError
class XDoToolKeyboardController(AbstractKeyboardController, XDoToolController):
def __init__(self, ssh_connection: Connection):
super().__init__(ssh_connection=ssh_connection)
def type(self, text: str):
self._execute_xdotool_command(f"type {text}")
def key(self, key: str):
self._execute_xdotool_command(f"key {key}")
def key_down(self, key: str):
self._execute_xdotool_command(f"keydown {key}")
def key_up(self, key: str):
self._execute_xdotool_command(f"keyup {key}")
class PythonKeyboardController(AbstractKeyboardController, PythonController):
def __init__(self, http_server: str):
super().__init__(http_server=http_server)
self.command = "python -c \"import keyboard; {command}\""
def type(self, text: str):
self._execute_python_command(self.command.format(command=f"keyboard.write('{text}')"))
def key(self, key: str):
self._execute_python_command(self.command.format(command=f"keyboard.press_and_release('{key}')"))
def key_down(self, key: str):
self._execute_python_command(self.command.format(command=f"keyboard.press('{key}')"))
def key_up(self, key: str):
self._execute_python_command(self.command.format(command=f"keyboard.release('{key}')"))

View File

@@ -1,144 +0,0 @@
from enum import Enum
from abc import ABC, abstractmethod
from fabric import Connection
from .xdotool import XDoToolController
from .python import PythonController
class MouseClick(Enum):
LEFT = 1
MIDDLE = 2
RIGHT = 3
WHEEL_UP = 4
WHEEL_DOWN = 5
class AbstractMouseController(ABC):
@abstractmethod
def mouse_move(self, x: int, y: int):
raise NotImplementedError
@abstractmethod
def left_down(self):
raise NotImplementedError
@abstractmethod
def left_up(self):
raise NotImplementedError
@abstractmethod
def left_click(self):
raise NotImplementedError
@abstractmethod
def middle_down(self):
raise NotImplementedError
@abstractmethod
def middle_up(self):
raise NotImplementedError
@abstractmethod
def middle_click(self):
raise NotImplementedError
@abstractmethod
def right_down(self):
raise NotImplementedError
@abstractmethod
def right_up(self):
raise NotImplementedError
@abstractmethod
def right_click(self):
raise NotImplementedError
@abstractmethod
def scroll_up(self):
raise NotImplementedError
@abstractmethod
def scroll_down(self):
raise NotImplementedError
class XDoToolMouseController(AbstractMouseController, XDoToolController):
def __init__(self, ssh_connection: Connection):
super().__init__(ssh_connection=ssh_connection)
def mouse_move(self, x: int, y: int):
self._execute_xdotool_command(f"mousemove {x} {y}")
def left_down(self):
self._execute_xdotool_command(f"mousedown 1")
def left_up(self):
self._execute_xdotool_command(f"mouseup 1")
def left_click(self):
self._execute_xdotool_command(f"click 1")
def middle_down(self):
self._execute_xdotool_command(f"mousedown 2")
def middle_up(self):
self._execute_xdotool_command(f"mouseup 2")
def middle_click(self):
self._execute_xdotool_command(f"click 2")
def right_down(self):
self._execute_xdotool_command(f"mousedown 3")
def right_up(self):
self._execute_xdotool_command(f"mouseup 3")
def right_click(self):
self._execute_xdotool_command(f"click 3")
def scroll_up(self):
self._execute_xdotool_command(f"click 4")
def scroll_down(self):
self._execute_xdotool_command(f"click 5")
class PythonMouseController(AbstractMouseController, PythonController):
def __init__(self, http_server: str):
super().__init__(http_server=http_server)
self.command = "python -c \"import mouse; {command}\""
def mouse_move(self, x: int, y: int):
self._execute_python_command(self.command.format(command=f"mouse.move({x}, {y})"))
def left_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='left')"))
def left_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='left')"))
def left_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='left')"))
def middle_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='middle')"))
def middle_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='middle')"))
def middle_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='middle')"))
def right_down(self):
self._execute_python_command(self.command.format(command="mouse.press(button='right')"))
def right_up(self):
self._execute_python_command(self.command.format(command="mouse.release(button='right')"))
def right_click(self):
self._execute_python_command(self.command.format(command="mouse.click(button='right')"))
def scroll_up(self):
self._execute_python_command(self.command.format(command="mouse.wheel(10)"))
def scroll_down(self):
self._execute_python_command(self.command.format(command="mouse.wheel(-10)"))

View File

@@ -1,34 +1,208 @@
import requests
import json import json
from typing import Any, Dict
import requests
from desktop_env.envs.actions import KEYBOARD_KEYS
class PythonController: class PythonController:
def __init__(self, http_server: str): def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
self.http_server = http_server self.http_server = http_server
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
def _execute_python_command(self, command: str) -> None:
payload = json.dumps({ def get_screenshot(self):
"command": command """
}) Gets a screenshot from the server. With the cursor.
"""
response = requests.get(self.http_server + "/screenshot")
if response.status_code == 200:
return response.content
else:
print("Failed to get screenshot. Status code:", response.status_code)
return None
def get_file(self, file_path: str):
"""
Gets a file from the server.
"""
response = requests.post(self.http_server + "/file", data={"file_path": file_path})
if response.status_code == 200:
print("File downloaded successfully")
return response.content
else:
print("Failed to get file. Status code:", response.status_code)
return None
def execute_python_command(self, command: str) -> None:
"""
Executes a python command on the server.
It can be used to execute the pyautogui commands, or... any other python command. who knows?
"""
command = self.pkgs_prefix.format(command=command)
payload = json.dumps({"command": command})
headers = { headers = {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
} }
try: try:
response = requests.post(self.http_server + "/execute", headers=headers, data=payload) response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
print("Command executed successfully:", response.text) print("Command executed successfully:", response.text)
else: else:
print("Failed to execute command. Status code:", response.status_code) print("Failed to execute command. Status code:", response.status_code)
return response.json()
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print("An error occurred while trying to execute the command:", e) print("An error occurred while trying to execute the command:", e)
# example usage def execute_action(self, action: Dict[str, Any]):
if __name__ == '__main__': """
# replace with your actual server URL of the vm Executes an action on the server computer.
server_url = "http://192.168.7.129:5000" """
controller = PythonController(server_url)
# example commands action_type = action["action_type"]
python_command = "python -c \"import keyboard; keyboard.write('hello world')\"" parameters = action["parameters"] if "parameters" in action else {}
python_command = "python -c \"import mouse; mouse.move(100,100);mouse.right_click()\""
controller._execute_python_command(python_command) if action_type == "MOVE_TO":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.moveTo()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
self.execute_python_command(f"pyautogui.moveTo({x}, {y})")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.click()")
elif "button" in parameters and "x" in parameters and "y" in parameters:
button = parameters["button"]
x = parameters["x"]
y = parameters["y"]
if "num_clicks" in parameters:
num_clicks = parameters["num_clicks"]
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
else:
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
elif "button" in parameters and "x" not in parameters and "y" not in parameters:
button = parameters["button"]
if "num_clicks" in parameters:
num_clicks = parameters["num_clicks"]
self.execute_python_command(f"pyautogui.click(button='{button}', clicks={num_clicks})")
else:
self.execute_python_command(f"pyautogui.click(button='{button}')")
elif "button" not in parameters and "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
if "num_clicks" in parameters:
num_clicks = parameters["num_clicks"]
self.execute_python_command(f"pyautogui.click(x={x}, y={y}, clicks={num_clicks})")
else:
self.execute_python_command(f"pyautogui.click(x={x}, y={y})")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "MOUSE_DOWN":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.mouseDown()")
elif "button" in parameters:
button = parameters["button"]
self.execute_python_command(f"pyautogui.mouseDown(button='{button}')")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "MOUSE_UP":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.mouseUp()")
elif "button" in parameters:
button = parameters["button"]
self.execute_python_command(f"pyautogui.mouseUp(button='{button}')")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "RIGHT_CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.rightClick()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
self.execute_python_command(f"pyautogui.rightClick(x={x}, y={y})")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "DOUBLE_CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.doubleClick()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
self.execute_python_command(f"pyautogui.doubleClick(x={x}, y={y})")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "DRAG_TO":
if "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
self.execute_python_command(f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
elif action_type == "SCROLL":
# todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
if "dx" in parameters and "dy" in parameters:
dx = parameters["dx"]
dy = parameters["dy"]
self.execute_python_command(f"pyautogui.hscroll({dx})")
self.execute_python_command(f"pyautogui.vscroll({dy})")
elif "dx" in parameters and "dy" not in parameters:
dx = parameters["dx"]
self.execute_python_command(f"pyautogui.hscroll({dx})")
elif "dx" not in parameters and "dy" in parameters:
dy = parameters["dy"]
self.execute_python_command(f"pyautogui.vscroll({dy})")
else:
raise Exception(f"Unknown parameters: {parameters}")
elif action_type == "TYPING":
if "text" not in parameters:
raise Exception(f"Unknown parameters: {parameters}")
text = parameters["text"]
self.execute_python_command(f"pyautogui.typewrite('{text}')")
elif action_type == "PRESS":
if "key" not in parameters:
raise Exception(f"Unknown parameters: {parameters}")
key = parameters["key"]
if key.lower() not in KEYBOARD_KEYS:
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
self.execute_python_command(f"pyautogui.press('{key}')")
elif action_type == "KEY_DOWN":
if "key" not in parameters:
raise Exception(f"Unknown parameters: {parameters}")
key = parameters["key"]
if key.lower() not in KEYBOARD_KEYS:
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
self.execute_python_command(f"pyautogui.keyDown('{key}')")
elif action_type == "KEY_UP":
if "key" not in parameters:
raise Exception(f"Unknown parameters: {parameters}")
key = parameters["key"]
if key.lower() not in KEYBOARD_KEYS:
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
self.execute_python_command(f"pyautogui.keyUp('{key}')")
elif action_type == "HOTKEY":
if "keys" not in parameters:
raise Exception(f"Unknown parameters: {parameters}")
keys = parameters["keys"]
if not isinstance(keys, list):
raise Exception(f"Keys must be a list of keys")
for key in keys:
if key.lower() not in KEYBOARD_KEYS:
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
keys_para_rep = "', '".join(keys)
self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
else:
raise Exception(f"Unknown action type: {action_type}")

View File

@@ -0,0 +1,96 @@
import requests
import json
class SetupController:
def __init__(self, http_server: str):
self.http_server = http_server + "/setup"
def setup(self, config):
"""
Setup Config:
{
download: list[tuple[string]], # a list of tuples of url of file to download and the save path
...
}
"""
self._download_setup(config)
self._change_wallpaper(config)
# self._tidy_desktop(config) todo: implement this
self._open_setup(config)
# can add other setup steps
def _download_setup(self, config):
if not config:
return
if not 'download' in config:
return
for url, path in config['download']:
if not url or not path:
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
payload = json.dumps({"url": url, "path": path})
headers = {
'Content-Type': 'application/json'
}
# send request to server to download file
try:
response = requests.post(self.http_server + "/download_file", headers=headers, data=payload)
if response.status_code == 200:
print("Command executed successfully:", response.text)
else:
print("Failed to download file. Status code:", response.text)
except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e)
def _change_wallpaper(self, config):
if not config:
return
if not 'wallpaper' in config:
return
path = config['wallpaper']
if not path:
raise Exception(f"Setup Wallpaper - Invalid path ({path}).")
payload = json.dumps({"path": path})
headers = {
'Content-Type': 'application/json'
}
# send request to server to change wallpaper
try:
response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload)
if response.status_code == 200:
print("Command executed successfully:", response.text)
else:
print("Failed to change wallpaper. Status code:", response.text)
except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e)
def _tidy_desktop(self, config):
raise NotImplementedError
def _open_setup(self, config):
if not config:
return
if not 'open' in config:
return
for path in config['open']:
if not path:
raise Exception(f"Setup Open - Invalid path ({path}).")
payload = json.dumps({"path": path})
headers = {
'Content-Type': 'application/json'
}
# send request to server to open file
try:
response = requests.post(self.http_server + "/open_file", headers=headers, data=payload)
if response.status_code == 200:
print("Command executed successfully:", response.text)
else:
print("Failed to open file. Status code:", response.text)
except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e)

View File

@@ -1,11 +0,0 @@
from fabric import Connection
from typing import List
class XDoToolController:
def __init__(self, ssh_connection: Connection):
self.ssh_connection = ssh_connection
def _execute_xdotool_command(self, command: List[str]) -> None:
result = self.ssh_connection.run(f"DISPLAY=:0 xdotool {command}", hide=True)
return result.stdout.strip()

190
desktop_env/envs/actions.py Normal file
View File

@@ -0,0 +1,190 @@
X_MAX = 1920 # TODO: get the screen resolution
Y_MAX = 1080
KEYBOARD_KEYS = ['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright']
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
}
]

View File

@@ -1,78 +1,61 @@
from enum import Enum from __future__ import annotations
from typing import Literal, List, Tuple
import os
import subprocess import subprocess
from fabric import Connection
import time import time
import uuid
import platform
from typing import List
import gymnasium as gym import gymnasium as gym
from gymnasium import spaces import requests
import numpy as np
from PIL import Image
from desktop_env.controllers.mouse import MouseClick, AbstractMouseController, XDoToolMouseController, PythonMouseController from desktop_env.controllers.python import PythonController
from desktop_env.controllers.keyboard import AbstractKeyboardController, XDoToolKeyboardController, PythonKeyboardController from desktop_env.controllers.setup import SetupController
from desktop_env.evaluators import eval_funcs
class Action(Enum):
CLICK = 0
MOUSE_DOWN = 1
MOUSE_UP = 2
MOUSE_MOVE = 3
KEY = 4
KEY_DOWN = 5
KEY_UP = 6
TYPE = 7
VM_TYPE = Literal['ubuntu', 'windows'] def _execute_command(command: List[str]) -> None:
if command[:4] == ["vmrun", "-T", "ws", "start"]:
p = subprocess.Popen(command)
p.wait()
else:
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True)
if result.returncode != 0:
raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m")
return result.stdout
class DesktopEnv(gym.Env): class DesktopEnv(gym.Env):
"""DesktopEnv with OpenAI Gym interface.""" """DesktopEnv with OpenAI Gym interface."""
def __init__(self, path_to_vm: str, username: str, password: str, def __init__(
host: str, snapshot_path: str = "some_point_browser", vm_os: VM_TYPE = "ubuntu"): self,
path_to_vm: str,
snapshot_path: str = "base",
instruction: str = None,
config: dict = None,
evaluator: dict = None,
action_space: str = "computer_13",
):
# Initialize environment variables
self.path_to_vm = path_to_vm self.path_to_vm = path_to_vm
self.username = username
self.password = password
self.host = host
self.snapshot_path = snapshot_path # todo: handling the logic of snapshot directory self.snapshot_path = snapshot_path # todo: handling the logic of snapshot directory
self.screen_width = 800 # Initialize emulator and controller
self.screen_height = 800
# Define the action and observation space
self.action_space = spaces.Dict({
"action_type": spaces.Discrete(len(Action)),
"click_type": spaces.Discrete(len(MouseClick)),
"x": spaces.Discrete(self.screen_width),
"y": spaces.Discrete(self.screen_height),
"key": spaces.MultiDiscrete([128] * 10), # max 10 characters, ASCII
"text": spaces.MultiDiscrete([128] * 10) # max 10 characters, ASCII
})
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
# Additional setup
self.metadata = {'render.modes': ['rgb_array']}
# Initialize emulator
print("Initializing...") print("Initializing...")
self._start_emulator() self._start_emulator()
self.host = f"http://{self._get_vm_ip()}:5000"
self.controller = PythonController(http_server=self.host)
self.setup_controller = SetupController(http_server=self.host)
self.instruction = instruction
self.config = config
self.evaluator = evaluator
# set up controllers # mode: human or machine
self.mouse_controller, self.keyboard_controller = self._create_controllers(vm_os) assert action_space in ["computer_13", "pyautogui"]
self.action_space = action_space
def _create_controllers(self, vm_os: VM_TYPE) -> Tuple[AbstractMouseController, AbstractKeyboardController]: # todo: define the action space and the observation space as gym did, or extend theirs
if vm_os == "ubuntu":
ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": self.password})
mouse_controller = XDoToolMouseController(ssh_connection)
keyboard_controller = XDoToolKeyboardController(ssh_connection)
elif vm_os == "windows":
mouse_controller = PythonMouseController(http_server=self.host)
keyboard_controller = PythonKeyboardController(http_server=self.host)
else:
raise NotImplementedError(vm_os)
return mouse_controller, keyboard_controller
def _start_emulator(self): def _start_emulator(self):
while True: while True:
@@ -84,108 +67,120 @@ class DesktopEnv(gym.Env):
break break
else: else:
print("Starting VM...") print("Starting VM...")
self._execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm]) _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
time.sleep(5) time.sleep(3)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f"Error executing command: {e.output.decode().strip()}") print(f"Error executing command: {e.output.decode().strip()}")
def _execute_command(self, command: List[str]) -> None: def _get_vm_ip(self):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) max_retries = 10
stdout, stderr = process.communicate() print("Getting IP Address...")
if process.returncode != 0: for _ in range(max_retries):
print(f"Error executing command: {command}") try:
return None output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
else: print(f"IP address: {output}")
return stdout.decode() return output
except:
time.sleep(5)
print("Retrying...")
raise Exception("Failed to get VM IP address!")
def _save_state(self): def _save_state(self):
self._execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path]) _execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
def _get_screenshot(self): def _get_screenshot(self):
image_path = "./screenshot.png" random_uuid = str(uuid.uuid4())
self._execute_command( os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
["vmrun", "-T", "ws", "-gu", self.username, "-gp", self.password, "captureScreen", self.path_to_vm, image_path = os.path.join("tmp", random_uuid, "screenshot.png")
image_path])
# Get the screenshot and save to the image_path
screenshot = self.controller.get_screenshot()
with open(image_path, "wb") as f:
f.write(screenshot)
return image_path return image_path
def _get_obs(self): def _get_obs(self):
screenshot_image_path = self._get_screenshot() screenshot_image_path = self._get_screenshot()
with Image.open(screenshot_image_path) as img: return screenshot_image_path
return np.array(img)
def reset(self): def reset(self, seed=None, options=None):
print("Resetting environment...") print("Resetting environment...")
print("Reverting to snapshot to {}...".format(self.snapshot_path)) print("Reverting to snapshot to {}...".format(self.snapshot_path))
self._execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path]) _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
time.sleep(5)
print("Starting emulator...") print("Starting emulator...")
self._start_emulator() self._start_emulator()
print("Emulator started.") print("Emulator started.")
print("Setting up environment...")
self.setup_controller.setup(self.config)
time.sleep(5)
print("Environment setup complete.")
observation = self._get_obs() observation = self._get_obs()
return observation return observation
def step(self, action): def step(self, action, pause=0.5):
action_type = Action(action['action_type']) # fixme: add reminding logic here, decide if the action is valid for the current action_space
if action_type == Action.CLICK: if self.action_space == "computer_13":
click = MouseClick(action['click_type']) # the set of all possible actions defined in the action representation
if click == MouseClick.LEFT: self.controller.execute_action(action)
self.mouse_controller.left_click() elif self.action_space == "pyautogui":
elif click == MouseClick.MIDDLE: # the set of all possible python commands insides `pyautogui`
self.mouse_controller.middle_click() self.controller.execute_python_command(action)
elif click == MouseClick.RIGHT:
self.mouse_controller.right_click()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_DOWN:
click = MouseClick(action['click_type'])
if click == MouseClick.LEFT:
self.mouse_controller.left_down()
elif click == MouseClick.MIDDLE:
self.mouse_controller.middle_down()
elif click == MouseClick.RIGHT:
self.mouse_controller.right_down()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_UP:
click = MouseClick(action['click_type'])
if click == MouseClick.LEFT:
self.mouse_controller.left_up()
elif click == MouseClick.MIDDLE:
self.mouse_controller.middle_up()
elif click == MouseClick.RIGHT:
self.mouse_controller.right_up()
elif click == MouseClick.WHEEL_UP:
self.mouse_controller.scroll_up()
elif click == MouseClick.WHEEL_DOWN:
self.mouse_controller.scroll_down()
elif action_type == Action.MOUSE_MOVE:
self.mouse_controller.mouse_move(x = action['x'], y = action['y'])
elif action_type == Action.KEY:
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
self.keyboard_controller.key(key_sequence)
elif action_type == Action.KEY_DOWN:
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
self.keyboard_controller.key_down(key_sequence)
elif action_type == Action.KEY_UP:
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
self.keyboard_controller.key_up(key_sequence)
elif action_type == Action.TYPE:
text = ''.join(map(chr, action['text'])) # Convert integer array to string
self.keyboard_controller.type(text)
# Capture new state # todo: maybe for the better here we need to add a logic to wait until the rendering is done
observation = self._get_obs() time.sleep(pause)
reward = 0 # Define reward calculation observation = {
done = False # Define episode termination condition "screenshot": self._get_obs(),
"instruction": self.instruction
}
reward = 0 # todo: Define reward calculation for each example
done = False # todo: Define episode termination condition for each example
info = {} info = {}
return observation, reward, done, info return observation, reward, done, info
def evaluate(self):
"""
Evaluate whether the task is successfully completed.
"""
def copy_file_to_local(_file_info):
random_uuid = str(uuid.uuid4())
os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
_path = os.path.join("tmp", random_uuid, "tmp.xlsx")
if _file_info["type"] == "cloud_file":
url = _file_info["path"]
response = requests.get(url, stream=True)
response.raise_for_status()
with open(_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
elif _file_info["type"] == "vm_file":
# fixme: stream this part maybe as well
file = self.controller.get_file(_file_info["path"])
with open(_path, "wb") as f:
f.write(file)
else:
raise NotImplementedError
return _path
# todo: make this more flexible by refactoring
eval_func = eval_funcs[self.evaluator["func"]]
eval_func_vars = {}
for var_name, file_info in self.evaluator["paths"].items():
path = copy_file_to_local(file_info)
eval_func_vars[var_name] = path
return eval_func(**eval_func_vars)
def render(self, mode='rgb_array'): def render(self, mode='rgb_array'):
if mode == 'rgb_array': if mode == 'rgb_array':
return self._get_obs() return self._get_obs()
@@ -193,4 +188,4 @@ class DesktopEnv(gym.Env):
raise ValueError('Unsupported render mode: {}'.format(mode)) raise ValueError('Unsupported render mode: {}'.format(mode))
def close(self): def close(self):
self._execute_command(["vmrun", "stop", self.path_to_vm]) _execute_command(["vmrun", "stop", self.path_to_vm])

View File

@@ -0,0 +1,5 @@
from .table import compare_table
eval_funcs = {
"compare_table(expected, actual)": compare_table
}

View File

View File

@@ -0,0 +1,14 @@
def compare_table(expected, actual):
import pandas as pd
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
# Compare the DataFrames
return 1 if df1.equals(df2) else 0
if __name__ == '__main__':
path1 = ""
path2 = ""
print(compare_table(path1, path2))

184
desktop_env/server/main.py Normal file
View File

@@ -0,0 +1,184 @@
import os
from pathlib import Path
import platform
import subprocess
import requests
import Xlib.display
import pyautogui
from PIL import ImageGrab, Image
from flask import Flask, request, jsonify, send_file
app = Flask(__name__)
pyautogui.PAUSE = 0
pyautogui.DARWIN_CATCH_UP_TIME = 0
@app.route('/execute', methods=['POST'])
def execute_command():
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
command = data.get('command', '')
# Execute the command without any safety checks.
try:
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor():
file_path = os.path.join("screenshots", "screenshot.png")
user_platform = platform.system()
# Ensure the screenshots directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
if user_platform == "Windows":
def _download_image(url, path):
response = requests.get(url)
with open(path, 'wb') as file:
file.write(response.content)
cursor_path = os.path.join("screenshots", "cursor.png")
if not os.path.exists(cursor_path):
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
_download_image(cursor_url, cursor_path)
screenshot = pyautogui.screenshot()
cursor_x, cursor_y = pyautogui.position()
cursor = Image.open(cursor_path)
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
screenshot.save(file_path)
elif user_platform == "Linux":
# Use xlib to prevent scrot dependency for Linux
screen = Xlib.display.Display().screen()
size = screen.width_in_pixels, screen.height_in_pixels
screenshot = ImageGrab.grab(bbox=(0, 0, size[0], size[1]))
screenshot.save(file_path)
elif user_platform == "Darwin": # (Mac OS)
# Use the screencapture utility to capture the screen with the cursor
subprocess.run(["screencapture", "-C", file_path])
else:
print(f"The platform you're using ({user_platform}) is not currently supported")
return send_file(file_path, mimetype='image/png')
@app.route('/file', methods=['POST'])
def get_file():
# Retrieve filename from the POST request
if 'file_path' in request.form:
file_path = request.form['file_path']
else:
return jsonify({"error": "file_path is required"}), 400
try:
# Check if the file exists and send it to the user
return send_file(file_path, as_attachment=True)
except FileNotFoundError:
# If the file is not found, return a 404 error
return jsonify({"error": "File not found"}), 404
@app.route('/platform', methods=['GET'])
def get_platform():
return platform.system()
@app.route('/cursor_position', methods=['GET'])
def get_cursor_position():
return pyautogui.position().x, pyautogui.position().y
@app.route("/setup/change_wallpaper", methods=['POST'])
def change_wallpaper():
data = request.json
path = data.get('path', None)
if not path:
return "Path not supplied!", 400
path = Path(path)
if not path.exists():
return f"File not found: {path}", 404
try:
user_platform = platform.system()
if user_platform == "Windows":
import ctypes
ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3)
elif user_platform == "Linux":
import subprocess
subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"])
elif user_platform == "Darwin": # (Mac OS)
import subprocess
subprocess.run(
["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"'])
return "Wallpaper changed successfully"
except Exception as e:
return f"Failed to change wallpaper. Error: {e}", 500
@app.route("/setup/download_file", methods=['POST'])
def download_file():
data = request.json
url = data.get('url', None)
path = data.get('path', None)
if not url or not path:
return "Path or URL not supplied!", 400
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
max_retries = 3
for i in range(max_retries):
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return "File downloaded successfully"
except requests.RequestException as e:
print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
return f"Failed to download {url}. No retries left. Error: {e}", 500
@app.route("/setup/open_file", methods=['POST'])
def open_file():
data = request.json
path = data.get('path', None)
if not path:
return "Path not supplied!", 400
path = Path(path)
if not path.exists():
return f"File not found: {path}", 404
try:
os.startfile(path)
return "File opened successfully"
except Exception as e:
return f"Failed to open {path}. Error: {e}", 500
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0")

View File

@@ -0,0 +1,5 @@
python3-xlib==0.15
PyAutoGUI==0.9.54
Pillow==10.1.0
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
requests

View File

@@ -1,29 +0,0 @@
from flask import Flask, request, jsonify
import subprocess
app = Flask(__name__)
@app.route('/execute', methods=['POST'])
def execute_command():
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
command = data.get('command', '')
# Execute the command without any safety checks.
try:
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
return jsonify({
'status': 'success',
'output': stdout.decode(),
'error': stderr.decode()
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0")

View File

@@ -0,0 +1,24 @@
# Evaluation examples
Here we put the data examples to benchmark the ability of agents when interacting with GUI.
The examples are stored in `./examples` where each data item formatted as:
```
{
"id": "uid", # unique id
"snapshot": "snapshot_id", # the snapshot id of the environment, with some data already there and apps already opened, or just desktop
"instruction": "natural_language_instruction", # the natural language instruction of the task, what we want the agent to do
"source": "website_url", # where we know this example, some forum, or some website, or some paper
"config": {xxx}, # the scripts to setup the donwload and open files actions, as the initial state of a task
"trajectory": "trajectory_directory", # the trajectory directory, which contains the action sequence file, the screenshots and the recording video
"related_apps": ["app1", "app2", ...], # the related apps, which are opened during the task
"evaluator": "evaluation_dir", # the directory of the evaluator, which contains the evaluation script for this example
}
```
The `./trajectories` file contains the annotated trajectories for each data item in `./examples` for finishing the task.
For now, it is under construction, and only tested on Windows 10. Please:
- Modify the path accordingly to run the evaluation;
- Remind us if some parts are overfit to our environment.

View File

@@ -0,0 +1,22 @@
{
"id": "0bf05a7d-b28b-44d2-955a-50b41e24012a",
"snapshot": "libreoffice_calc",
"instruction": "I would like to pad all the numbers in the 'Old ID' column with zeros in front, to fill them up to seven digits in the 'New 7 Digit ID' column.",
"source": "https://www.youtube.com/shorts/FPAQaDTS8VY",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
]
},
"trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "2bd59342-0664-4ccb-ba87-79379096cc08",
"snapshot": "libreoffice_calc",
"instruction": "Make sparkline chart line by line",
"source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
]
},
"trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,34 @@
{
"id": "37608790-6147-45d0-9f20-1137bb35703d",
"snapshot": "libreoffice_calc",
"instruction": "Help me fill the columns of First Name, Last Name and Rank",
"source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
]
},
"trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"func": "compare_table(expected, actual)",
"paths": {
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747"
},
"actual": {
"type": "vm_file",
"path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
}
}
}
}

View File

@@ -0,0 +1,22 @@
{
"id": "7a4e4bc8-922c-4c84-865c-25ba34136be1",
"snapshot": "libreoffice_calc",
"instruction": "Reorder the columns to be \"Data\", \"First Name\", \"Last Name\", \"Order ID\", \"Sales\"",
"source": "https://www.youtube.com/shorts/bvUhr1AHs44",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
]
},
"trajectory": "trajectories/7a4e4bc8-922c-4c84-865c-25ba34136be1",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "7b802dad-6e0f-4204-9815-d4e3f57627d8",
"snapshot": "libreoffice_calc",
"instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.",
"source": "https://www.youtube.com/shorts/Of-lzeP1usE",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
},
"trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "7efeb4b1-3d19-4762-b163-63328d66303b",
"snapshot": "libreoffice_calc",
"instruction": "Fill in the Serieal Numbers in \"Serial #\" column",
"source": "https://www.youtube.com/shorts/4jzXfZNhfmk",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
]
},
"trajectory": "trajectories/",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"snapshot": "libreoffice_calc",
"instruction": "Clean the messy movie titles and put them in the cleaned column",
"source": "https://www.youtube.com/shorts/A0gmEBRKXWs",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,34 @@
{
"id": "d681960f-7bc3-4286-9913-a8812ba3261a",
"snapshot": "libreoffice_calc",
"instruction": "According to the green table shown above, calculate and give each student a grade",
"source": "https://www.youtube.com/shorts/d7U1S_IsTVM",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1wodZjx1KjThUsrtF6ZJaCTy1fQX4E9vA&export=download&authuser=0&confirm=t&uuid=d07ca312-1abc-40f2-81cd-d06e27119854&at=APZUnTWwjnxsHQYapSvpLR8NmlfV:1701785087048",
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
]
},
"trajectory": "trajectories/d681960f-7bc3-4286-9913-a8812ba3261a",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"func": "compare_table(expected, actual)",
"paths": {
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1kfEHJH1n0yCsQp443IIFvdD9uWv0DWMr&export=download&authuser=0&confirm=t&uuid=d9907f65-8d39-4ecc-8747-b4ed7e6011f5&at=APZUnTXpPAnlh5sD6q-R8oQtqL6g:1702362952170"
},
"actual": {
"type": "vm_file",
"path": "C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
}
}
}
}

View File

@@ -0,0 +1,22 @@
{
"id": "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"snapshot": "libreoffice_calc",
"instruction": "Traverse the table and paste it below",
"source": "https://www.youtube.com/shorts/t9JLUaT55UQ",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"snapshot": "libreoffice_calc",
"instruction": "Enable each cell in the column\"Pass/Fail/Held\" is a drop down list",
"source": "https://www.youtube.com/shorts/tXOovKn0H68",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,34 @@
{
"id": "f9584479-3d0d-4c79-affa-9ad7afdd8850",
"snapshot": "libreoffice_calc",
"instruction": "Fill the missing row and column which show the total value",
"source": "https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
]
},
"trajectory": "trajectories/f9584479-3d0d-4c79-affa-9ad7afdd8850",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"func": "compare_table(expected, actual)",
"paths": {
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=17f1wZuJPvUEc5at_Fy3c18VFdOk0x7xz&export=download&authuser=0&confirm=t&uuid=6d2edffd-0ce0-426e-9820-8af25b4667f3&at=APZUnTVh7JS85dwZBaV2hytWQgDK:1702361510956"
},
"actual": {
"type": "vm_file",
"path": "C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
}
}
}
}

View File

@@ -0,0 +1,13 @@
{
"id": "",
"snapshot": "libreoffice_calc",
"instruction": "",
"source": "",
"config": {
},
"trajectory": "trajectories/",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

87
main.py
View File

@@ -1,56 +1,51 @@
from pprint import pprint import json
from desktop_env.envs.desktop_env import DesktopEnv, Action, MouseClick from desktop_env.envs.desktop_env import DesktopEnv
def get_human_action():
"""
Prompts the human player for an action and returns a structured action.
"""
print("\nAvailable actions:", [action.name for action in Action])
action_type = None
while action_type not in [action.value for action in Action]:
action_type = Action[input("Enter the type of action: ".strip())].value
action = {"action_type": action_type}
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
print("\n Available clicks:", [action.name for action in MouseClick])
click_type = input("Enter click type: ")
action["click_type"] = MouseClick[click_type].value
if action_type == Action.MOUSE_MOVE.value:
x = int(input("Enter x-coordinate for mouse move: "))
y = int(input("Enter y-coordinate for mouse move: "))
action["x"] = x
action["y"] = y
if action_type == Action.KEY.value:
key = input("Enter the key to press: ")
action["key"] = [ord(c) for c in key]
if action_type == Action.TYPE.value:
text = input("Enter the text to type: ")
action["text"] = [ord(c) for c in text]
return action
def human_agent(): def human_agent():
""" """
Runs the Gym environment with human input. Runs the Gym environment with human input.
""" """
env = DesktopEnv(path_to_vm="/home/yuri/vmware/Windows 10 x64/Windows 10 x64.vmx",
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", with open("evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json", "r") as f:
username="user", example = json.load(f)
password="password",
# host="192.168.7.128", env = DesktopEnv(
host="http://192.168.7.129:5000", # path_to_vm=r"""C:\Users\tianbaox\Downloads\Windows 10 x64\Windows 10 x64.vmx""",
vm_os="windows") path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""",
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
action_space="computer_13",
snapshot_path="base_setup3",
instruction=example["instruction"],
config=example["config"],
evaluator=example["evaluator"]
)
# reset the environment to certain snapshot
observation = env.reset() observation = env.reset()
done = False done = False
while not done: trajectory = [
action = get_human_action() {
observation, reward, done, info = env.step(action) "action_type": "MOVE_TO",
"parameters": {
"x": 754,
"y": 1057
}
},
{"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
]
for i in range(len(trajectory)):
# action = get_human_action()
# action = {
# "action_type": 0,
# "click_type": 3,
# }
print(trajectory[i])
observation, reward, done, info = env.step(trajectory[i], pause=5)
print("Observation:", observation) print("Observation:", observation)
print("Reward:", reward) print("Reward:", reward)
print("Info:", info) print("Info:", info)
@@ -61,8 +56,12 @@ def human_agent():
print("The episode is done.") print("The episode is done.")
break break
result = env.evaluate()
print("Result:", result)
env.close() env.close()
print("Environment closed.") print("Environment closed.")
if __name__ == "__main__": if __name__ == "__main__":
human_agent() human_agent()

View File

@@ -1,8 +1,12 @@
# fixme: Need to be rewrite on new action space
import os import os
import re
import base64 import base64
from desktop_env.envs.desktop_env import Action, MouseClick from desktop_env.envs.desktop_env import Action, MouseClick
import json5 import json
import requests import requests
from mm_agents.gpt_4v_prompt import SYS_PROMPT
# Function to encode the image # Function to encode the image
@@ -11,6 +15,38 @@ def encode_image(image_path):
return base64.b64encode(image_file.read()).decode('utf-8') return base64.b64encode(image_file.read()).decode('utf-8')
def parse_actions_from_string(input_string):
# Search for a JSON string within the input string
actions = []
matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
try:
action_dict = json.loads(input_string)
return [action_dict]
except json.JSONDecodeError as e:
raise ValueError("Invalid response format: " + input_string)
class GPT4v_Agent: class GPT4v_Agent:
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300): def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
self.instruction = instruction self.instruction = instruction
@@ -22,18 +58,13 @@ class GPT4v_Agent:
"Authorization": f"Bearer {api_key}" "Authorization": f"Bearer {api_key}"
} }
# load prompt from file
self.prompt = ""
with open("gpt_4v_prompt.txt", "r") as f:
self.prompt = f.read()
self.trajectory = [ self.trajectory = [
{ {
"role": "system", "role": "system",
"content": [ "content": [
{ {
"type": "text", "type": "text",
"text": self.prompt "text": SYS_PROMPT
}, },
] ]
} }
@@ -56,6 +87,12 @@ class GPT4v_Agent:
} }
] ]
}) })
traj_to_show = []
for i in range(len(self.trajectory)):
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
if len(self.trajectory[i]["content"]) > 1:
traj_to_show.append("screenshot_obs")
print("Trajectory:", traj_to_show)
payload = { payload = {
"model": self.model, "model": self.model,
"messages": self.trajectory, "messages": self.trajectory,
@@ -63,11 +100,15 @@ class GPT4v_Agent:
} }
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload) response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
action = self.parse_action(response.json()['choices'][0]['message']['content']) try:
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
except:
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
actions = None
return action return actions
def parse_action(self, response: str): def parse_actions(self, response: str):
# response example # response example
""" """
```json ```json
@@ -79,12 +120,7 @@ class GPT4v_Agent:
""" """
# parse from the response # parse from the response
if response.startswith("```json"): actions = parse_actions_from_string(response)
action = json5.loads(response[7:-3])
elif response.startswith("```"):
action = json5.loads(response[3:-3])
else:
action = json5.loads(response)
# add action into the trajectory # add action into the trajectory
self.trajectory.append({ self.trajectory.append({
@@ -98,25 +134,28 @@ class GPT4v_Agent:
}) })
# parse action # parse action
parsed_action = {} parsed_actions = []
action_type = Action[action['action_type']].value for action in actions:
parsed_action["action_type"] = action_type parsed_action = {}
action_type = Action[action['action_type']].value
parsed_action["action_type"] = action_type
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value: if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
parsed_action["click_type"] = MouseClick[action['click_type']].value parsed_action["click_type"] = MouseClick[action['click_type']].value
if action_type == Action.MOUSE_MOVE.value: if action_type == Action.MOUSE_MOVE.value:
parsed_action["x"] = action["x"] parsed_action["x"] = action["x"]
parsed_action["y"] = action["y"] parsed_action["y"] = action["y"]
# fixme: could these two actions be merged?? if action_type == Action.KEY.value:
if action_type == Action.KEY.value: parsed_action["key"] = action["key"] # handle the condition of single key and multiple keys
parsed_action["key"] = [ord(c) for c in action["key"]]
if action_type == Action.TYPE.value: if action_type == Action.TYPE.value:
parsed_action["text"] = [ord(c) for c in action["text"]] parsed_action["text"] = action["text"]
return parsed_action parsed_actions.append(parsed_action)
return parsed_actions
if __name__ == '__main__': if __name__ == '__main__':
@@ -125,4 +164,3 @@ if __name__ == '__main__':
agent = GPT4v_Agent(api_key=api_key, instruction="Open Google Sheet") agent = GPT4v_Agent(api_key=api_key, instruction="Open Google Sheet")
print(agent.predict(obs="stackoverflow.png")) print(agent.predict(obs="stackoverflow.png"))

View File

@@ -0,0 +1,54 @@
SYS_PROMPT = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
Here is the description of the action space:
Firstly you need to predict the class of your action, select from one below:
- **MOUSE_MOVE**: move the mouse to a specific position
- **CLICK**: click on the screen
- **MOUSE_DOWN**: press the mouse button
- **MOUSE_UP**: release the mouse button
- **KEY**: press a key on the keyboard
- **KEY_DOWN**: press a key on the keyboard
- **KEY_UP**: release a key on the keyboard
- **TYPE**: type a string on the keyboard
Then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
For every step, you should only return the action_type and the parameters of your action as a dict, without any other things. You MUST wrap the dict with backticks (\`).
You can predict multiple actions at one step, but you should only return one action for each step.
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
"""

View File

@@ -0,0 +1,8 @@
SYS_PROMPT = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
You are required to use `pyautogui` to perform the action.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
Return `None` if you cannot perform the action.
"""

124
mm_agents/sam_test.py Normal file
View File

@@ -0,0 +1,124 @@
import torch
from PIL import Image
import requests
from transformers import SamModel, SamProcessor
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_box(box, ax):
x0, y0 = box[0], box[1]
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
def show_boxes_on_image(raw_image, boxes):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points_on_image(raw_image, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
plt.axis('on')
plt.show()
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
def show_masks_on_image(raw_image, masks, scores):
if len(masks.shape) == 4:
masks = masks.squeeze()
if scores.shape[0] == 1:
scores = scores.squeeze()
nb_predictions = scores.shape[-1]
fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
for i, (mask, score) in enumerate(zip(masks, scores)):
mask = mask.cpu().detach()
axes[i].imshow(np.array(raw_image))
show_mask(mask, axes[i])
axes[i].title.set_text(f"Mask {i + 1}, Score: {score.item():.3f}")
axes[i].axis("off")
plt.show()
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
plt.imshow(raw_image)
inputs = processor(raw_image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
)
scores = outputs.iou_scores
show_masks_on_image(raw_image, masks[0], scores)

View File

@@ -1,8 +1,16 @@
numpy numpy~=1.24.3
Pillow Pillow~=10.1.0
fabric fabric
gymnasium gymnasium~=0.28.1
requests requests~=2.31.0
transformers transformers~=4.35.2
torch torch~=2.1.1+cu118
accelerate accelerate
opencv-python~=4.8.1.78
matplotlib~=3.7.4
pynput~=1.7.6
pyautogui~=0.9.54
psutil~=5.9.6
tqdm~=4.65.0
pandas~=2.0.3
flask~=3.0.0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 356 KiB

After

Width:  |  Height:  |  Size: 826 KiB

File diff suppressed because one or more lines are too long

1788
utils/complex_clicking.jsonl Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -3,76 +3,97 @@ import sys, pathlib;
sys.path.append(str(pathlib.Path(__file__).parents[1])) sys.path.append(str(pathlib.Path(__file__).parents[1]))
import os import os
import math
import json import json
import numpy as np
from typing import List from typing import List
from desktop_env.envs.desktop_env import Action, MouseClick from copy import deepcopy
pynput2pyautogui_key = {
"alt_l": "altleft",
"alt_r": "altright",
}
COMMAND_KEYS = ['accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright', 'alt_l', 'alt_r']
typingkey2str = {
"space" : " ",
}
class DuckTrackEventActionConverter: class DuckTrackEventActionConverter:
def __init__(self, human_readable: str, compress_move: bool = True): def __init__(self, ):
self.human_readable = human_readable """"""
self.compress_move = compress_move
def enum_to_str(self, enum): ### Enumerations ###
"""Converts an enum to its string representation if HUMAN_READABLE is True, otherwise returns its value.""" def move_event_to_action(self, event: dict, action_space: str = "computer_13"):
return enum.name if self.human_readable else enum.value """Converts a mouse move event to its corresponding action."""
if action_space == "computer_13":
return {
"action_type": "MOVE_TO",
"parameters": {
"x": event["x"],
"y": event["y"]
}
}
elif action_space == "pyautogui":
return "pyautogui.moveTo({}, {})".format(event["x"], event["y"])
def compress_mouse_move(self, data: List[dict], index: int): def click_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Compresses consecutive mouse move events into first and last move events.""" """Converts a mouse click event to its corresponding action."""
first_move, last_move = data[index], data[index] action = {
while index < len(data) and data[index]["action"] == "move": "action_type": None,
last_move = data[index] "parameters": {
index += 1 "button": None
return first_move, last_move, index }
}
def move_event_to_action(self, event: dict):
return {"action_type": self.enum_to_str(Action.MOUSE_MOVE),
"x": event["x"],
"y": event["y"]}
def click_event_to_action(self, event: dict):
action = {}
mouse_button = event["button"] mouse_button = event["button"]
mouse_pressed = event["pressed"] mouse_pressed = event["pressed"]
if mouse_pressed: if mouse_pressed:
action["action_type"] = self.enum_to_str(Action.MOUSE_DOWN) action["action_type"] = "MOUSE_DOWN"
elif not mouse_pressed: elif not mouse_pressed:
action["action_type"] = self.enum_to_str(Action.MOUSE_UP) action["action_type"] = "MOUSE_UP"
else: else:
raise NotImplementedError(mouse_pressed) raise NotImplementedError(mouse_pressed)
if mouse_button == "left": if mouse_button in ["left", "right", "middle"]:
action["click_type"] = self.enum_to_str(MouseClick.LEFT) action["parameters"]["button"] = mouse_button
elif mouse_button == "right":
action["click_type"] = self.enum_to_str(MouseClick.RIGHT)
elif mouse_button == "middle":
action["click_type"] = self.enum_to_str(MouseClick.MIDDLE)
else: else:
raise NotImplementedError(mouse_button) raise NotImplementedError(mouse_button)
return action return action
def press_event_to_action(self, event: dict): def press_event_to_action(self, event: dict, action_space: str = "computer_13"):
return {"action_type": self.enum_to_str(Action.KEY_DOWN), """Converts a key down event to its corresponding action."""
"key": [ord(c) for c in event["name"]]} # NOTE: the `key down`, `press` have the same meaning here, while different in pyautogui
return {
"action_type": "KEY_DOWN",
"parameters": {
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
event["name"]]
}
}
def release_event_to_action(self, event: dict): def release_event_to_action(self, event: dict, action_space: str = "computer_13"):
return {"action_type": self.enum_to_str(Action.KEY_UP), """Converts a key release event to its corresponding action."""
"key": [ord(c) for c in event["name"]]} return {
"action_type": "KEY_UP",
"parameters": {
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
event["name"]]
}
}
def scroll_event_to_action(self, event: dict): def scroll_event_to_action(self, event: dict, action_space: str = "computer_13"):
# TODO: need to confirm if df < 0 means scroll up or down """Converts a scroll event to its corresponding action."""
if event["dy"] < 0: return {
down = False "action_type": "SCROLL",
else: "parameters": {
down = True "dx": event["dx"],
"dy": event["dy"]
}
}
return {"action_type": self.enum_to_str(Action.CLICK), def event_to_action(self, event: dict, action_space: str = "computer_13"):
"click_type": self.enum_to_str(MouseClick.WHEEL_DOWN) if down else self.enum_to_str(
MouseClick.WHEEL_UP)}
def event_to_action(self, event: dict):
"""Converts an event to its corresponding action based on the event type.""" """Converts an event to its corresponding action based on the event type."""
if event["action"] == "move": if event["action"] == "move":
return self.move_event_to_action(event) return self.move_event_to_action(event)
@@ -87,114 +108,243 @@ class DuckTrackEventActionConverter:
else: else:
raise NotImplementedError(event["action"]) raise NotImplementedError(event["action"])
def ducktrack_event_file_to_action(self, ducktrack_event_file: str, out_file: str, compress_move: bool = None): ### Compressing ###
def compress_mouse_move(self, data: List[dict], index: int):
"""Compresses consecutive mouse move events into the last move events."""
last_move = data[index]
while index < len(data) and data[index]["action"] == "move":
last_move = data[index]
index += 1
return last_move, index
def compress_scroll(self, data: List[dict], index: int):
"""Compresses consecutive scroll events into a single scroll event."""
last_scroll = data[index]
consecutive_dx, consecutive_dy = data[index]["dx"], data[index]["dy"]
while index < len(data) and data[index]["action"] == "scroll" and np.sign(data[index]["dx"]) == np.sign(consecutive_dx) and np.sign(data[index]["dy"]) == np.sign(consecutive_dy):
last_scroll = data[index]
consecutive_dx += data[index]["dx"]
consecutive_dy += data[index]["dy"]
index += 1
last_scroll["dx"], last_scroll["dy"] = consecutive_dx, consecutive_dy
return last_scroll, index
### Converting ###
def ducktrack_event_file_to_action(self, ducktrack_event_file: str, out_file: str, compress_move: bool = True, compress_scroll: bool = True, compress_click: bool = True,compress_drag: bool = True, compress_press_key: bool = True, compress_typing: bool = True):
"""Converts DuckTrack event data to a list of actions and saves them to a file.""" """Converts DuckTrack event data to a list of actions and saves them to a file."""
if not os.path.exists(ducktrack_event_file): if not os.path.exists(ducktrack_event_file):
raise FileNotFoundError(ducktrack_event_file) raise FileNotFoundError(ducktrack_event_file)
# set to default
if compress_move is None:
compress_move = self.compress_move
with open(ducktrack_event_file, 'r') as file: with open(ducktrack_event_file, 'r') as file:
data = [json.loads(line) for line in file] events = [json.loads(line) for line in file]
result = {"action": [], "event": []} # Save the compressed actions in a list
result = []
index = 0 index = 0
presses_to_skip = 0
releases_to_skip = 0
move_to_skip = 0
keys_pressed = []
# Compress the mouse move events # Compress the mouse move events
while index < len(data): while index < len(events):
event = data[index]
if event["action"] == "move" and compress_move: event = events[index]
first_move, last_move, index = self.compress_mouse_move(data, index)
result["action"].extend([self.event_to_action(last_move)]) def do_mouse_press(button: str, _index: int):
result["event"].extend([last_move])
else: num_clicks = 0
result["action"].append(self.event_to_action(event)) mouse_pressed = True
result["event"].append(event) skip_move = 0
click_x, click_y = event["x"], event["y"]
for j, next_event in enumerate(events[index + 1:]):
# make sure the time between mouse clicks is less than 500ms
if next_event["time_stamp"] - event["time_stamp"] > 0.5:
if num_clicks > 0:
if result[-1:][0]["action_type"] == "MOVE_TO":
result.pop()
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"x" : click_x,
"y" : click_y,
"num_clicks": num_clicks
}
})
return num_clicks-1, num_clicks, _index, skip_move
break
if "x" in next_event and "y" in next_event:
# if the mouse moves out of the click radius/rectangle, it is not a click sequence
if math.sqrt((next_event["y"] - event["y"]) ** 2 +
(next_event["x"] - event["x"]) ** 2) > 4:
if num_clicks > 0:
if result[-1:][0]["action_type"] == "MOVE_TO":
result.pop()
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"x" : click_x,
"y" : click_y,
"num_clicks": num_clicks
}
})
return num_clicks-1, num_clicks, _index, skip_move
break
if next_event["action"] == "click" and compress_click:
if not next_event["pressed"]:
num_clicks += 1
mouse_pressed = False
if num_clicks == 3:
if result[-1:][0]["action_type"] == "MOVE_TO":
result.pop()
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"x" : click_x,
"y" : click_y,
"num_clicks": 3
}
})
return 2, 3, _index, skip_move
elif next_event["pressed"]:
mouse_pressed = True
else:
raise NotImplementedError(next_event["pressed"])
elif next_event["action"] != "click" and not mouse_pressed:
if next_event["action"] == "move":
if next_event["x"] == click_x and next_event["y"] == click_y:
skip_move += 1
continue
if result[-1:][0]["action_type"] == "MOVE_TO":
result.pop()
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"x" : click_x,
"y" : click_y,
"num_clicks": num_clicks
}
})
return num_clicks-1, num_clicks, _index, skip_move
# Compress {MOUSE_DOWN, MOVE, MOUSE_UP} into DRAG_TO event
elif next_event["action"] == "move" and compress_drag:
if next_event["x"] == click_x and next_event["y"] == click_y:
skip_move += 1
continue
last_move, _index = self.compress_mouse_move(events, _index+1)
result.append({
"action_type": "DRAG_TO",
"parameters": {
"x": last_move["x"],
"y": last_move["y"]
}
})
return 0, 1, _index, skip_move
result.append({
"action_type": "MOUSE_DOWN",
"parameters": {
"button": button
}
})
return 0, 0, _index, skip_move
if event["action"] == "move":
if move_to_skip > 0:
move_to_skip -= 1
index += 1
continue
if compress_move:
last_move, index = self.compress_mouse_move(events, index)
result.extend([self.event_to_action(last_move)])
elif event["action"] == "scroll" and compress_scroll:
last_scroll, index = self.compress_scroll(events, index)
result.extend([self.event_to_action(last_scroll)])
elif event["action"] == "click":
button = event["button"]
if event["pressed"]:
if presses_to_skip == 0:
presses, releases, index, moves = do_mouse_press(button, index)
presses_to_skip += presses
releases_to_skip += releases
move_to_skip += moves
else:
presses_to_skip -= 1
else:
if releases_to_skip == 0:
result.append({
"action_type": "MOUSE_UP",
"parameters": {
"button": button
}
})
else:
releases_to_skip -= 1
index += 1 index += 1
elif event["action"] == "press" and event["name"] not in COMMAND_KEYS and compress_typing:
# Compress the key down and key up actions typing_words = ""
# todo: handling the key down and key up events while index < len(events) and events[index]["action"] in ["press", "release"] and events[index]["name"] not in COMMAND_KEYS:
_new_actions = [] if events[index]["action"] == "press":
_action = list(result["action"]) keys_pressed.append(events[index]["name"])
idx = 0 typing_words += events[index]["name"] if events[index]["name"] not in typingkey2str else typingkey2str[events[index]["name"]]
elif events[index]["action"] == "release":
while True: keys_pressed.remove(events[index]["name"])
if idx >= len(_action): index += 1
break if len(typing_words) > 1:
result.append({
if _action[idx]["action_type"] == self.enum_to_str(Action.KEY_DOWN): "action_type": "TYPING",
typed_text = [] "parameters": {
while idx < len(_action) and _action[idx]["action_type"] in [self.enum_to_str(Action.KEY_DOWN), self.enum_to_str(Action.KEY_UP)] and len(_action[idx]["key"]) == 1: "text": typing_words
if _action[idx]["action_type"] == self.enum_to_str(Action.KEY_DOWN): }
typed_text.append(chr(_action[idx]["key"][0])) })
idx += 1
if typed_text:
_new_actions.append({"action_type": self.enum_to_str(Action.TYPE), "text": typed_text})
else: else:
_new_actions.append(_action[idx]) result.append({
idx += 1 "action_type": "PRESS",
"parameters": {
"key": typing_words
}
})
elif event["action"] == "press" and compress_press_key:
keys_pressed.append(event["name"])
result.append({
"action_type": "PRESS",
"parameters": {
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
event["name"]]
}
})
index += 1
elif event["action"] == "release" and compress_press_key:
keys_pressed.remove(event["name"])
index += 1
else: else:
_new_actions.append(_action[idx]) result.append(self.event_to_action(event))
idx += 1 index += 1
result["action"] = _new_actions
# Compress the scroll up and scroll down events
# todo: handling the key down and key up events
_new_actions = []
_action = list(result["action"])
idx = 0
while True:
if idx >= len(_action):
break
if _action[idx]["action_type"] == self.enum_to_str(Action.CLICK) and _action[idx]["click_type"] in [self.enum_to_str(MouseClick.WHEEL_UP), self.enum_to_str(MouseClick.WHEEL_DOWN)]:
typed_text = []
while idx < len(_action) and _action[idx]["action_type"] == self.enum_to_str(Action.CLICK) and _action[idx]["click_type"] in [self.enum_to_str(MouseClick.WHEEL_UP), self.enum_to_str(MouseClick.WHEEL_DOWN)]:
if _action[idx]["click_type"] == self.enum_to_str(MouseClick.WHEEL_UP):
typed_text.append("UP")
idx += 1
elif _action[idx]["click_type"] == self.enum_to_str(MouseClick.WHEEL_DOWN):
typed_text.append("DOWN")
idx += 1
_new_actions.append({"action_type": self.enum_to_str(Action.CLICK), "click_type": "SCROLL", "text": typed_text})
else:
_new_actions.append(_action[idx])
idx += 1
result["action"] = _new_actions
# Compress the mouse down and mouse up actions
# todo: handling the key down and key up events
_new_actions = []
_action = list(result["action"])
idx = 0
while True:
if idx >= len(_action):
break
if _action[idx]["action_type"] == self.enum_to_str(Action.MOUSE_DOWN):
if idx + 1 < len(_action) and _action[idx+1]["action_type"] == self.enum_to_str(Action.MOUSE_UP):
_new_actions.append({"action_type": self.enum_to_str(Action.CLICK), "click_type": _action[idx]["click_type"]})
idx += 2
else:
_new_actions.append(_action[idx])
idx += 1
else:
_new_actions.append(_action[idx])
idx += 1
result["action"] = _new_actions
with open(out_file, "w") as f: with open(out_file, "w") as f:
json.dump(result, f) json.dump(result, f)
if __name__ == "__main__": if __name__ == "__main__":
converter = DuckTrackEventActionConverter(human_readable=True) converter = DuckTrackEventActionConverter()
converter.ducktrack_event_file_to_action(ducktrack_event_file="sample.jsonl", converter.ducktrack_event_file_to_action(
out_file="output.json", ducktrack_event_file="complex_clicking.jsonl",
compress_move=True) out_file="complex_clicking5.json",
compress_move=True,
compress_scroll=True,
compress_click=True,
compress_drag=True,
compress_press_key=True,
compress_typing=True,
)

111
utils/events_calc.json Normal file
View File

@@ -0,0 +1,111 @@
[
{
"action_type": "MOVE_TO",
"parameters": {
"x": 152,
"y": 259
}
},
{
"action_type": "MOUSE_DOWN",
"parameters": {
"button": "left"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 464,
"y": 317
}
},
{
"action_type": "MOUSE_UP",
"parameters": {
"button": "left"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 466,
"y": 317
}
},
{
"action_type": "KEY_DOWN",
"parameters": {
"key": "altleft"
}
},
{
"action_type": "KEY_DOWN",
"parameters": {
"key": "="
}
},
{
"action_type": "KEY_UP",
"parameters": {
"key": "="
}
},
{
"action_type": "KEY_UP",
"parameters": {
"key": "altleft"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 709,
"y": 1047
}
},
{
"action_type": "MOUSE_DOWN",
"parameters": {
"button": "left"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 709,
"y": 1047
}
},
{
"action_type": "MOUSE_UP",
"parameters": {
"button": "left"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 717,
"y": 304
}
},
{
"action_type": "MOUSE_DOWN",
"parameters": {
"button": "left"
}
},
{
"action_type": "MOVE_TO",
"parameters": {
"x": 717,
"y": 304
}
},
{
"action_type": "MOUSE_UP",
"parameters": {
"button": "left"
}
}
]

423
utils/events_calc.jsonl Normal file
View File

@@ -0,0 +1,423 @@
{"time_stamp": 21028.2899763, "action": "move", "x": 686, "y": 306}
{"time_stamp": 21028.2965794, "action": "move", "x": 684, "y": 306}
{"time_stamp": 21028.3046644, "action": "move", "x": 678, "y": 306}
{"time_stamp": 21028.3126807, "action": "move", "x": 670, "y": 306}
{"time_stamp": 21028.3208329, "action": "move", "x": 661, "y": 306}
{"time_stamp": 21028.3288313, "action": "move", "x": 645, "y": 306}
{"time_stamp": 21028.336626, "action": "move", "x": 625, "y": 306}
{"time_stamp": 21028.3445457, "action": "move", "x": 603, "y": 305}
{"time_stamp": 21028.3527487, "action": "move", "x": 574, "y": 303}
{"time_stamp": 21028.3606394, "action": "move", "x": 544, "y": 301}
{"time_stamp": 21028.3688565, "action": "move", "x": 508, "y": 300}
{"time_stamp": 21028.3768381, "action": "move", "x": 471, "y": 298}
{"time_stamp": 21028.3848709, "action": "move", "x": 430, "y": 296}
{"time_stamp": 21028.3926563, "action": "move", "x": 389, "y": 296}
{"time_stamp": 21028.4009164, "action": "move", "x": 348, "y": 296}
{"time_stamp": 21028.4089388, "action": "move", "x": 313, "y": 296}
{"time_stamp": 21028.4171707, "action": "move", "x": 280, "y": 296}
{"time_stamp": 21028.4245847, "action": "move", "x": 252, "y": 294}
{"time_stamp": 21028.4328148, "action": "move", "x": 225, "y": 294}
{"time_stamp": 21028.4406678, "action": "move", "x": 208, "y": 294}
{"time_stamp": 21028.4486998, "action": "move", "x": 192, "y": 294}
{"time_stamp": 21028.4568529, "action": "move", "x": 177, "y": 294}
{"time_stamp": 21028.4647334, "action": "move", "x": 163, "y": 293}
{"time_stamp": 21028.4729702, "action": "move", "x": 153, "y": 293}
{"time_stamp": 21028.4808044, "action": "move", "x": 143, "y": 293}
{"time_stamp": 21028.4889062, "action": "move", "x": 135, "y": 293}
{"time_stamp": 21028.4967676, "action": "move", "x": 130, "y": 293}
{"time_stamp": 21028.5050544, "action": "move", "x": 124, "y": 293}
{"time_stamp": 21028.5127317, "action": "move", "x": 120, "y": 293}
{"time_stamp": 21028.520827, "action": "move", "x": 117, "y": 293}
{"time_stamp": 21028.5289378, "action": "move", "x": 114, "y": 293}
{"time_stamp": 21028.5371078, "action": "move", "x": 111, "y": 293}
{"time_stamp": 21028.545514, "action": "move", "x": 107, "y": 293}
{"time_stamp": 21028.5527022, "action": "move", "x": 104, "y": 292}
{"time_stamp": 21028.5605384, "action": "move", "x": 100, "y": 292}
{"time_stamp": 21028.5686583, "action": "move", "x": 96, "y": 291}
{"time_stamp": 21028.5766951, "action": "move", "x": 90, "y": 291}
{"time_stamp": 21028.5847502, "action": "move", "x": 85, "y": 291}
{"time_stamp": 21028.5926223, "action": "move", "x": 79, "y": 290}
{"time_stamp": 21028.6007454, "action": "move", "x": 74, "y": 290}
{"time_stamp": 21028.6088707, "action": "move", "x": 70, "y": 289}
{"time_stamp": 21028.6166501, "action": "move", "x": 67, "y": 289}
{"time_stamp": 21028.6249259, "action": "move", "x": 66, "y": 289}
{"time_stamp": 21028.6647889, "action": "move", "x": 66, "y": 289}
{"time_stamp": 21028.6728642, "action": "move", "x": 68, "y": 288}
{"time_stamp": 21028.6807781, "action": "move", "x": 70, "y": 286}
{"time_stamp": 21028.6888295, "action": "move", "x": 74, "y": 285}
{"time_stamp": 21028.6971027, "action": "move", "x": 77, "y": 284}
{"time_stamp": 21028.7046499, "action": "move", "x": 81, "y": 282}
{"time_stamp": 21028.7129405, "action": "move", "x": 86, "y": 281}
{"time_stamp": 21028.7205325, "action": "move", "x": 91, "y": 279}
{"time_stamp": 21028.7285422, "action": "move", "x": 98, "y": 278}
{"time_stamp": 21028.7366509, "action": "move", "x": 104, "y": 275}
{"time_stamp": 21028.7448279, "action": "move", "x": 110, "y": 275}
{"time_stamp": 21028.7527897, "action": "move", "x": 116, "y": 273}
{"time_stamp": 21028.7609718, "action": "move", "x": 120, "y": 272}
{"time_stamp": 21028.7688693, "action": "move", "x": 124, "y": 271}
{"time_stamp": 21028.7766846, "action": "move", "x": 128, "y": 270}
{"time_stamp": 21028.7848371, "action": "move", "x": 131, "y": 270}
{"time_stamp": 21028.7927773, "action": "move", "x": 133, "y": 268}
{"time_stamp": 21028.8007498, "action": "move", "x": 134, "y": 268}
{"time_stamp": 21028.8088143, "action": "move", "x": 136, "y": 268}
{"time_stamp": 21028.8168157, "action": "move", "x": 137, "y": 268}
{"time_stamp": 21028.8246469, "action": "move", "x": 139, "y": 268}
{"time_stamp": 21028.8327817, "action": "move", "x": 140, "y": 268}
{"time_stamp": 21028.8408239, "action": "move", "x": 141, "y": 268}
{"time_stamp": 21028.8488115, "action": "move", "x": 142, "y": 267}
{"time_stamp": 21028.8571578, "action": "move", "x": 143, "y": 267}
{"time_stamp": 21028.8646641, "action": "move", "x": 144, "y": 267}
{"time_stamp": 21028.8741985, "action": "move", "x": 145, "y": 267}
{"time_stamp": 21028.8809717, "action": "move", "x": 146, "y": 267}
{"time_stamp": 21028.8888646, "action": "move", "x": 146, "y": 267}
{"time_stamp": 21028.961049, "action": "move", "x": 146, "y": 266}
{"time_stamp": 21029.0249854, "action": "move", "x": 147, "y": 265}
{"time_stamp": 21029.0328138, "action": "move", "x": 147, "y": 264}
{"time_stamp": 21029.0407582, "action": "move", "x": 147, "y": 264}
{"time_stamp": 21029.0487772, "action": "move", "x": 148, "y": 263}
{"time_stamp": 21029.0569372, "action": "move", "x": 148, "y": 263}
{"time_stamp": 21029.065073, "action": "move", "x": 149, "y": 262}
{"time_stamp": 21029.0729933, "action": "move", "x": 150, "y": 262}
{"time_stamp": 21029.0888149, "action": "move", "x": 150, "y": 261}
{"time_stamp": 21029.0971595, "action": "move", "x": 151, "y": 260}
{"time_stamp": 21029.10458, "action": "move", "x": 151, "y": 260}
{"time_stamp": 21029.1126284, "action": "move", "x": 151, "y": 260}
{"time_stamp": 21029.1208764, "action": "move", "x": 151, "y": 259}
{"time_stamp": 21029.1287413, "action": "move", "x": 152, "y": 259}
{"time_stamp": 21029.1611214, "action": "move", "x": 152, "y": 259}
{"time_stamp": 21029.1614723, "action": "click", "x": 152, "y": 259, "button": "left", "pressed": true}
{"time_stamp": 21029.2168134, "action": "move", "x": 152, "y": 259}
{"time_stamp": 21029.2248681, "action": "move", "x": 154, "y": 259}
{"time_stamp": 21029.2327317, "action": "move", "x": 156, "y": 260}
{"time_stamp": 21029.2408222, "action": "move", "x": 158, "y": 262}
{"time_stamp": 21029.2487515, "action": "move", "x": 163, "y": 263}
{"time_stamp": 21029.2568152, "action": "move", "x": 169, "y": 266}
{"time_stamp": 21029.2649126, "action": "move", "x": 174, "y": 270}
{"time_stamp": 21029.2727425, "action": "move", "x": 183, "y": 273}
{"time_stamp": 21029.2807226, "action": "move", "x": 190, "y": 276}
{"time_stamp": 21029.2887741, "action": "move", "x": 200, "y": 279}
{"time_stamp": 21029.296883, "action": "move", "x": 209, "y": 282}
{"time_stamp": 21029.304834, "action": "move", "x": 220, "y": 285}
{"time_stamp": 21029.3131548, "action": "move", "x": 233, "y": 287}
{"time_stamp": 21029.3207916, "action": "move", "x": 244, "y": 290}
{"time_stamp": 21029.3290871, "action": "move", "x": 256, "y": 292}
{"time_stamp": 21029.3366508, "action": "move", "x": 268, "y": 293}
{"time_stamp": 21029.3445108, "action": "move", "x": 279, "y": 294}
{"time_stamp": 21029.3529213, "action": "move", "x": 288, "y": 297}
{"time_stamp": 21029.3607282, "action": "move", "x": 298, "y": 297}
{"time_stamp": 21029.3691604, "action": "move", "x": 307, "y": 297}
{"time_stamp": 21029.3769931, "action": "move", "x": 316, "y": 298}
{"time_stamp": 21029.3850192, "action": "move", "x": 324, "y": 300}
{"time_stamp": 21029.3927881, "action": "move", "x": 331, "y": 301}
{"time_stamp": 21029.4007925, "action": "move", "x": 336, "y": 302}
{"time_stamp": 21029.4088638, "action": "move", "x": 342, "y": 304}
{"time_stamp": 21029.4167924, "action": "move", "x": 346, "y": 304}
{"time_stamp": 21029.4251047, "action": "move", "x": 349, "y": 304}
{"time_stamp": 21029.4328699, "action": "move", "x": 352, "y": 306}
{"time_stamp": 21029.4409293, "action": "move", "x": 355, "y": 306}
{"time_stamp": 21029.4487136, "action": "move", "x": 356, "y": 307}
{"time_stamp": 21029.4568755, "action": "move", "x": 358, "y": 308}
{"time_stamp": 21029.4647053, "action": "move", "x": 361, "y": 309}
{"time_stamp": 21029.4728173, "action": "move", "x": 363, "y": 310}
{"time_stamp": 21029.4806011, "action": "move", "x": 365, "y": 311}
{"time_stamp": 21029.4889321, "action": "move", "x": 367, "y": 312}
{"time_stamp": 21029.4967544, "action": "move", "x": 370, "y": 313}
{"time_stamp": 21029.5049087, "action": "move", "x": 374, "y": 314}
{"time_stamp": 21029.5129759, "action": "move", "x": 377, "y": 316}
{"time_stamp": 21029.5210278, "action": "move", "x": 381, "y": 317}
{"time_stamp": 21029.5286154, "action": "move", "x": 386, "y": 317}
{"time_stamp": 21029.5371491, "action": "move", "x": 390, "y": 318}
{"time_stamp": 21029.5449815, "action": "move", "x": 393, "y": 319}
{"time_stamp": 21029.5526305, "action": "move", "x": 397, "y": 319}
{"time_stamp": 21029.5604721, "action": "move", "x": 400, "y": 319}
{"time_stamp": 21029.5690371, "action": "move", "x": 402, "y": 319}
{"time_stamp": 21029.5772927, "action": "move", "x": 405, "y": 319}
{"time_stamp": 21029.5846161, "action": "move", "x": 406, "y": 319}
{"time_stamp": 21029.5928399, "action": "move", "x": 407, "y": 319}
{"time_stamp": 21029.6007032, "action": "move", "x": 408, "y": 319}
{"time_stamp": 21029.609118, "action": "move", "x": 409, "y": 319}
{"time_stamp": 21029.6166036, "action": "move", "x": 411, "y": 320}
{"time_stamp": 21029.6249215, "action": "move", "x": 412, "y": 320}
{"time_stamp": 21029.6327262, "action": "move", "x": 414, "y": 320}
{"time_stamp": 21029.6408018, "action": "move", "x": 415, "y": 320}
{"time_stamp": 21029.649463, "action": "move", "x": 418, "y": 320}
{"time_stamp": 21029.6575693, "action": "move", "x": 420, "y": 320}
{"time_stamp": 21029.6650956, "action": "move", "x": 423, "y": 320}
{"time_stamp": 21029.6729346, "action": "move", "x": 426, "y": 320}
{"time_stamp": 21029.6808747, "action": "move", "x": 429, "y": 320}
{"time_stamp": 21029.688616, "action": "move", "x": 432, "y": 320}
{"time_stamp": 21029.6970675, "action": "move", "x": 435, "y": 320}
{"time_stamp": 21029.7049324, "action": "move", "x": 438, "y": 320}
{"time_stamp": 21029.7130458, "action": "move", "x": 439, "y": 320}
{"time_stamp": 21029.7207522, "action": "move", "x": 440, "y": 320}
{"time_stamp": 21029.7289775, "action": "move", "x": 442, "y": 320}
{"time_stamp": 21029.7366577, "action": "move", "x": 443, "y": 320}
{"time_stamp": 21029.7444825, "action": "move", "x": 445, "y": 320}
{"time_stamp": 21029.7526551, "action": "move", "x": 447, "y": 320}
{"time_stamp": 21029.7604951, "action": "move", "x": 448, "y": 320}
{"time_stamp": 21029.7686569, "action": "move", "x": 450, "y": 319}
{"time_stamp": 21029.7775496, "action": "move", "x": 451, "y": 319}
{"time_stamp": 21029.7849685, "action": "move", "x": 451, "y": 319}
{"time_stamp": 21029.7929356, "action": "move", "x": 452, "y": 319}
{"time_stamp": 21029.8007005, "action": "move", "x": 452, "y": 319}
{"time_stamp": 21029.8170717, "action": "move", "x": 453, "y": 319}
{"time_stamp": 21029.8248574, "action": "move", "x": 453, "y": 318}
{"time_stamp": 21029.8330359, "action": "move", "x": 454, "y": 318}
{"time_stamp": 21029.8407804, "action": "move", "x": 454, "y": 318}
{"time_stamp": 21029.8487615, "action": "move", "x": 455, "y": 318}
{"time_stamp": 21029.8648369, "action": "move", "x": 455, "y": 318}
{"time_stamp": 21029.8726477, "action": "move", "x": 456, "y": 318}
{"time_stamp": 21029.8809607, "action": "move", "x": 457, "y": 317}
{"time_stamp": 21029.8888473, "action": "move", "x": 457, "y": 317}
{"time_stamp": 21029.9048933, "action": "move", "x": 458, "y": 317}
{"time_stamp": 21029.9129577, "action": "move", "x": 458, "y": 317}
{"time_stamp": 21029.9208533, "action": "move", "x": 459, "y": 317}
{"time_stamp": 21029.9286645, "action": "move", "x": 459, "y": 317}
{"time_stamp": 21029.9368461, "action": "move", "x": 461, "y": 317}
{"time_stamp": 21029.9448712, "action": "move", "x": 461, "y": 317}
{"time_stamp": 21029.953212, "action": "move", "x": 462, "y": 317}
{"time_stamp": 21029.9608238, "action": "move", "x": 463, "y": 317}
{"time_stamp": 21029.9686821, "action": "move", "x": 463, "y": 317}
{"time_stamp": 21029.9768342, "action": "move", "x": 464, "y": 317}
{"time_stamp": 21030.361149, "action": "move", "x": 464, "y": 317}
{"time_stamp": 21030.3613383, "action": "click", "x": 464, "y": 317, "button": "left", "pressed": false}
{"time_stamp": 21030.9690893, "action": "move", "x": 465, "y": 317}
{"time_stamp": 21030.9770331, "action": "move", "x": 465, "y": 317}
{"time_stamp": 21030.9933165, "action": "move", "x": 466, "y": 317}
{"time_stamp": 21031.8410512, "action": "press", "name": "alt_l"}
{"time_stamp": 21032.1375784, "action": "press", "name": "="}
{"time_stamp": 21032.2331653, "action": "release", "name": "="}
{"time_stamp": 21032.4009051, "action": "release", "name": "alt_l"}
{"time_stamp": 21033.1212821, "action": "move", "x": 466, "y": 317}
{"time_stamp": 21033.1289659, "action": "move", "x": 467, "y": 320}
{"time_stamp": 21033.1370348, "action": "move", "x": 471, "y": 325}
{"time_stamp": 21033.1456134, "action": "move", "x": 475, "y": 332}
{"time_stamp": 21033.1531721, "action": "move", "x": 482, "y": 340}
{"time_stamp": 21033.1605014, "action": "move", "x": 490, "y": 349}
{"time_stamp": 21033.1692663, "action": "move", "x": 498, "y": 359}
{"time_stamp": 21033.1771117, "action": "move", "x": 508, "y": 371}
{"time_stamp": 21033.1850449, "action": "move", "x": 521, "y": 383}
{"time_stamp": 21033.1929826, "action": "move", "x": 535, "y": 399}
{"time_stamp": 21033.201192, "action": "move", "x": 546, "y": 415}
{"time_stamp": 21033.2089185, "action": "move", "x": 555, "y": 434}
{"time_stamp": 21033.216848, "action": "move", "x": 563, "y": 452}
{"time_stamp": 21033.2246769, "action": "move", "x": 570, "y": 469}
{"time_stamp": 21033.2328685, "action": "move", "x": 574, "y": 485}
{"time_stamp": 21033.2407514, "action": "move", "x": 577, "y": 503}
{"time_stamp": 21033.2488102, "action": "move", "x": 578, "y": 518}
{"time_stamp": 21033.2569003, "action": "move", "x": 578, "y": 534}
{"time_stamp": 21033.2654896, "action": "move", "x": 580, "y": 552}
{"time_stamp": 21033.2730147, "action": "move", "x": 580, "y": 571}
{"time_stamp": 21033.2808888, "action": "move", "x": 582, "y": 592}
{"time_stamp": 21033.2890461, "action": "move", "x": 583, "y": 617}
{"time_stamp": 21033.2968868, "action": "move", "x": 586, "y": 643}
{"time_stamp": 21033.3050093, "action": "move", "x": 588, "y": 665}
{"time_stamp": 21033.3129685, "action": "move", "x": 591, "y": 694}
{"time_stamp": 21033.3210515, "action": "move", "x": 592, "y": 716}
{"time_stamp": 21033.3289082, "action": "move", "x": 594, "y": 735}
{"time_stamp": 21033.3368274, "action": "move", "x": 598, "y": 751}
{"time_stamp": 21033.3446464, "action": "move", "x": 601, "y": 761}
{"time_stamp": 21033.3532343, "action": "move", "x": 604, "y": 773}
{"time_stamp": 21033.3607161, "action": "move", "x": 606, "y": 783}
{"time_stamp": 21033.3687129, "action": "move", "x": 608, "y": 794}
{"time_stamp": 21033.3769088, "action": "move", "x": 611, "y": 804}
{"time_stamp": 21033.3846615, "action": "move", "x": 614, "y": 816}
{"time_stamp": 21033.3927661, "action": "move", "x": 617, "y": 826}
{"time_stamp": 21033.4008999, "action": "move", "x": 619, "y": 837}
{"time_stamp": 21033.408732, "action": "move", "x": 621, "y": 846}
{"time_stamp": 21033.4169038, "action": "move", "x": 623, "y": 856}
{"time_stamp": 21033.4250181, "action": "move", "x": 623, "y": 865}
{"time_stamp": 21033.4329144, "action": "move", "x": 624, "y": 875}
{"time_stamp": 21033.4410593, "action": "move", "x": 624, "y": 883}
{"time_stamp": 21033.448994, "action": "move", "x": 626, "y": 891}
{"time_stamp": 21033.4570193, "action": "move", "x": 626, "y": 899}
{"time_stamp": 21033.4648038, "action": "move", "x": 627, "y": 906}
{"time_stamp": 21033.4730101, "action": "move", "x": 628, "y": 913}
{"time_stamp": 21033.4815421, "action": "move", "x": 631, "y": 920}
{"time_stamp": 21033.4891275, "action": "move", "x": 635, "y": 926}
{"time_stamp": 21033.4970011, "action": "move", "x": 639, "y": 930}
{"time_stamp": 21033.5047772, "action": "move", "x": 647, "y": 935}
{"time_stamp": 21033.5132552, "action": "move", "x": 653, "y": 939}
{"time_stamp": 21033.5211245, "action": "move", "x": 659, "y": 943}
{"time_stamp": 21033.5292347, "action": "move", "x": 665, "y": 947}
{"time_stamp": 21033.5373088, "action": "move", "x": 671, "y": 950}
{"time_stamp": 21033.5447875, "action": "move", "x": 677, "y": 955}
{"time_stamp": 21033.5529495, "action": "move", "x": 684, "y": 960}
{"time_stamp": 21033.5609559, "action": "move", "x": 690, "y": 965}
{"time_stamp": 21033.5689335, "action": "move", "x": 696, "y": 971}
{"time_stamp": 21033.5768783, "action": "move", "x": 700, "y": 977}
{"time_stamp": 21033.5846548, "action": "move", "x": 703, "y": 981}
{"time_stamp": 21033.5931357, "action": "move", "x": 705, "y": 985}
{"time_stamp": 21033.6009205, "action": "move", "x": 707, "y": 988}
{"time_stamp": 21033.6088781, "action": "move", "x": 708, "y": 991}
{"time_stamp": 21033.6169713, "action": "move", "x": 709, "y": 994}
{"time_stamp": 21033.6249134, "action": "move", "x": 709, "y": 997}
{"time_stamp": 21033.6328882, "action": "move", "x": 710, "y": 999}
{"time_stamp": 21033.6412016, "action": "move", "x": 711, "y": 1003}
{"time_stamp": 21033.648939, "action": "move", "x": 711, "y": 1007}
{"time_stamp": 21033.6572201, "action": "move", "x": 713, "y": 1010}
{"time_stamp": 21033.6647348, "action": "move", "x": 715, "y": 1013}
{"time_stamp": 21033.6730325, "action": "move", "x": 716, "y": 1017}
{"time_stamp": 21033.6810552, "action": "move", "x": 717, "y": 1021}
{"time_stamp": 21033.6890871, "action": "move", "x": 719, "y": 1024}
{"time_stamp": 21033.6969594, "action": "move", "x": 720, "y": 1026}
{"time_stamp": 21033.7048284, "action": "move", "x": 720, "y": 1028}
{"time_stamp": 21033.7126425, "action": "move", "x": 720, "y": 1028}
{"time_stamp": 21033.7610156, "action": "move", "x": 720, "y": 1029}
{"time_stamp": 21033.7693689, "action": "move", "x": 720, "y": 1029}
{"time_stamp": 21033.7772628, "action": "move", "x": 720, "y": 1030}
{"time_stamp": 21033.7847737, "action": "move", "x": 720, "y": 1031}
{"time_stamp": 21033.7929223, "action": "move", "x": 719, "y": 1031}
{"time_stamp": 21033.801029, "action": "move", "x": 719, "y": 1032}
{"time_stamp": 21033.808944, "action": "move", "x": 718, "y": 1033}
{"time_stamp": 21033.8169394, "action": "move", "x": 717, "y": 1035}
{"time_stamp": 21033.8248771, "action": "move", "x": 716, "y": 1035}
{"time_stamp": 21033.8334548, "action": "move", "x": 716, "y": 1036}
{"time_stamp": 21033.8410779, "action": "move", "x": 715, "y": 1037}
{"time_stamp": 21033.8486117, "action": "move", "x": 715, "y": 1039}
{"time_stamp": 21033.8568906, "action": "move", "x": 713, "y": 1039}
{"time_stamp": 21033.8649249, "action": "move", "x": 712, "y": 1040}
{"time_stamp": 21033.8729566, "action": "move", "x": 712, "y": 1042}
{"time_stamp": 21033.8810286, "action": "move", "x": 711, "y": 1043}
{"time_stamp": 21033.8888454, "action": "move", "x": 711, "y": 1044}
{"time_stamp": 21033.8970736, "action": "move", "x": 709, "y": 1045}
{"time_stamp": 21033.9051884, "action": "move", "x": 709, "y": 1046}
{"time_stamp": 21033.91297, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21033.9210518, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21033.9770341, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21033.9932821, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21033.9933595, "action": "click", "x": 709, "y": 1047, "button": "left", "pressed": true}
{"time_stamp": 21034.0734669, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21034.0737272, "action": "click", "x": 709, "y": 1047, "button": "left", "pressed": false}
{"time_stamp": 21034.1450402, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21034.1608305, "action": "move", "x": 709, "y": 1047}
{"time_stamp": 21034.1690642, "action": "move", "x": 709, "y": 1046}
{"time_stamp": 21034.1770086, "action": "move", "x": 709, "y": 1045}
{"time_stamp": 21034.1849649, "action": "move", "x": 709, "y": 1044}
{"time_stamp": 21034.1927171, "action": "move", "x": 709, "y": 1043}
{"time_stamp": 21034.2008052, "action": "move", "x": 709, "y": 1040}
{"time_stamp": 21034.2088854, "action": "move", "x": 709, "y": 1038}
{"time_stamp": 21034.2167939, "action": "move", "x": 709, "y": 1034}
{"time_stamp": 21034.224882, "action": "move", "x": 709, "y": 1029}
{"time_stamp": 21034.2327267, "action": "move", "x": 711, "y": 1023}
{"time_stamp": 21034.2408131, "action": "move", "x": 711, "y": 1016}
{"time_stamp": 21034.2502186, "action": "move", "x": 712, "y": 1005}
{"time_stamp": 21034.256732, "action": "move", "x": 713, "y": 991}
{"time_stamp": 21034.2646169, "action": "move", "x": 716, "y": 976}
{"time_stamp": 21034.2729272, "action": "move", "x": 719, "y": 955}
{"time_stamp": 21034.2813953, "action": "move", "x": 722, "y": 929}
{"time_stamp": 21034.2889074, "action": "move", "x": 723, "y": 899}
{"time_stamp": 21034.2971538, "action": "move", "x": 725, "y": 871}
{"time_stamp": 21034.3049341, "action": "move", "x": 727, "y": 838}
{"time_stamp": 21034.3130394, "action": "move", "x": 727, "y": 805}
{"time_stamp": 21034.3208269, "action": "move", "x": 728, "y": 771}
{"time_stamp": 21034.3289492, "action": "move", "x": 728, "y": 742}
{"time_stamp": 21034.3367866, "action": "move", "x": 728, "y": 714}
{"time_stamp": 21034.3446895, "action": "move", "x": 728, "y": 686}
{"time_stamp": 21034.3528319, "action": "move", "x": 728, "y": 662}
{"time_stamp": 21034.3606113, "action": "move", "x": 728, "y": 643}
{"time_stamp": 21034.3686987, "action": "move", "x": 727, "y": 620}
{"time_stamp": 21034.3766536, "action": "move", "x": 725, "y": 605}
{"time_stamp": 21034.3847084, "action": "move", "x": 722, "y": 589}
{"time_stamp": 21034.3930586, "action": "move", "x": 719, "y": 576}
{"time_stamp": 21034.4009346, "action": "move", "x": 716, "y": 565}
{"time_stamp": 21034.4090089, "action": "move", "x": 712, "y": 554}
{"time_stamp": 21034.416996, "action": "move", "x": 710, "y": 544}
{"time_stamp": 21034.4246653, "action": "move", "x": 708, "y": 536}
{"time_stamp": 21034.4331124, "action": "move", "x": 707, "y": 527}
{"time_stamp": 21034.4410156, "action": "move", "x": 706, "y": 519}
{"time_stamp": 21034.4488925, "action": "move", "x": 705, "y": 509}
{"time_stamp": 21034.4568042, "action": "move", "x": 705, "y": 500}
{"time_stamp": 21034.4650783, "action": "move", "x": 704, "y": 492}
{"time_stamp": 21034.472962, "action": "move", "x": 703, "y": 483}
{"time_stamp": 21034.4809251, "action": "move", "x": 703, "y": 475}
{"time_stamp": 21034.4889399, "action": "move", "x": 703, "y": 467}
{"time_stamp": 21034.4968154, "action": "move", "x": 703, "y": 460}
{"time_stamp": 21034.505111, "action": "move", "x": 703, "y": 454}
{"time_stamp": 21034.5128327, "action": "move", "x": 703, "y": 446}
{"time_stamp": 21034.5211697, "action": "move", "x": 704, "y": 439}
{"time_stamp": 21034.5291453, "action": "move", "x": 704, "y": 432}
{"time_stamp": 21034.53683, "action": "move", "x": 704, "y": 428}
{"time_stamp": 21034.5453754, "action": "move", "x": 705, "y": 423}
{"time_stamp": 21034.5531997, "action": "move", "x": 705, "y": 419}
{"time_stamp": 21034.5610828, "action": "move", "x": 705, "y": 417}
{"time_stamp": 21034.568917, "action": "move", "x": 705, "y": 414}
{"time_stamp": 21034.5768693, "action": "move", "x": 705, "y": 412}
{"time_stamp": 21034.5849601, "action": "move", "x": 706, "y": 409}
{"time_stamp": 21034.5930116, "action": "move", "x": 706, "y": 406}
{"time_stamp": 21034.6006017, "action": "move", "x": 706, "y": 404}
{"time_stamp": 21034.6086777, "action": "move", "x": 706, "y": 402}
{"time_stamp": 21034.6167229, "action": "move", "x": 706, "y": 400}
{"time_stamp": 21034.6251342, "action": "move", "x": 706, "y": 398}
{"time_stamp": 21034.6325694, "action": "move", "x": 706, "y": 396}
{"time_stamp": 21034.6407476, "action": "move", "x": 706, "y": 393}
{"time_stamp": 21034.6489079, "action": "move", "x": 707, "y": 390}
{"time_stamp": 21034.6567719, "action": "move", "x": 707, "y": 388}
{"time_stamp": 21034.6648437, "action": "move", "x": 707, "y": 386}
{"time_stamp": 21034.6735978, "action": "move", "x": 707, "y": 383}
{"time_stamp": 21034.6808034, "action": "move", "x": 707, "y": 381}
{"time_stamp": 21034.6887831, "action": "move", "x": 707, "y": 379}
{"time_stamp": 21034.6968931, "action": "move", "x": 707, "y": 377}
{"time_stamp": 21034.7048123, "action": "move", "x": 707, "y": 375}
{"time_stamp": 21034.7127621, "action": "move", "x": 706, "y": 373}
{"time_stamp": 21034.7208214, "action": "move", "x": 706, "y": 372}
{"time_stamp": 21034.7289712, "action": "move", "x": 705, "y": 371}
{"time_stamp": 21034.7366015, "action": "move", "x": 705, "y": 370}
{"time_stamp": 21034.7449792, "action": "move", "x": 705, "y": 369}
{"time_stamp": 21034.7528215, "action": "move", "x": 705, "y": 368}
{"time_stamp": 21034.7611243, "action": "move", "x": 705, "y": 367}
{"time_stamp": 21034.7689338, "action": "move", "x": 705, "y": 366}
{"time_stamp": 21034.7768638, "action": "move", "x": 705, "y": 365}
{"time_stamp": 21034.7849091, "action": "move", "x": 705, "y": 364}
{"time_stamp": 21034.792848, "action": "move", "x": 705, "y": 363}
{"time_stamp": 21034.8010344, "action": "move", "x": 705, "y": 362}
{"time_stamp": 21034.809155, "action": "move", "x": 704, "y": 362}
{"time_stamp": 21034.8166183, "action": "move", "x": 704, "y": 359}
{"time_stamp": 21034.8249556, "action": "move", "x": 704, "y": 358}
{"time_stamp": 21034.8333238, "action": "move", "x": 704, "y": 356}
{"time_stamp": 21034.8410045, "action": "move", "x": 703, "y": 354}
{"time_stamp": 21034.8486685, "action": "move", "x": 703, "y": 352}
{"time_stamp": 21034.857368, "action": "move", "x": 703, "y": 350}
{"time_stamp": 21034.8647224, "action": "move", "x": 703, "y": 347}
{"time_stamp": 21034.8730798, "action": "move", "x": 703, "y": 346}
{"time_stamp": 21034.8809692, "action": "move", "x": 703, "y": 342}
{"time_stamp": 21034.8889165, "action": "move", "x": 703, "y": 341}
{"time_stamp": 21034.8969094, "action": "move", "x": 704, "y": 339}
{"time_stamp": 21034.9052672, "action": "move", "x": 704, "y": 337}
{"time_stamp": 21034.9145868, "action": "move", "x": 704, "y": 335}
{"time_stamp": 21034.9208561, "action": "move", "x": 704, "y": 334}
{"time_stamp": 21034.928931, "action": "move", "x": 704, "y": 333}
{"time_stamp": 21034.9374176, "action": "move", "x": 704, "y": 332}
{"time_stamp": 21034.9451258, "action": "move", "x": 704, "y": 330}
{"time_stamp": 21034.9528709, "action": "move", "x": 704, "y": 329}
{"time_stamp": 21034.9611476, "action": "move", "x": 704, "y": 328}
{"time_stamp": 21034.968991, "action": "move", "x": 704, "y": 327}
{"time_stamp": 21034.9768394, "action": "move", "x": 705, "y": 325}
{"time_stamp": 21034.9848553, "action": "move", "x": 705, "y": 324}
{"time_stamp": 21034.993121, "action": "move", "x": 705, "y": 323}
{"time_stamp": 21035.0007992, "action": "move", "x": 706, "y": 322}
{"time_stamp": 21035.0088762, "action": "move", "x": 707, "y": 320}
{"time_stamp": 21035.0166123, "action": "move", "x": 707, "y": 320}
{"time_stamp": 21035.0247724, "action": "move", "x": 708, "y": 318}
{"time_stamp": 21035.0335071, "action": "move", "x": 708, "y": 317}
{"time_stamp": 21035.0411458, "action": "move", "x": 709, "y": 317}
{"time_stamp": 21035.0491997, "action": "move", "x": 709, "y": 316}
{"time_stamp": 21035.0569637, "action": "move", "x": 711, "y": 314}
{"time_stamp": 21035.06496, "action": "move", "x": 711, "y": 313}
{"time_stamp": 21035.0726588, "action": "move", "x": 712, "y": 312}
{"time_stamp": 21035.0807214, "action": "move", "x": 713, "y": 311}
{"time_stamp": 21035.0888078, "action": "move", "x": 713, "y": 309}
{"time_stamp": 21035.0972443, "action": "move", "x": 713, "y": 309}
{"time_stamp": 21035.1048868, "action": "move", "x": 714, "y": 308}
{"time_stamp": 21035.1127551, "action": "move", "x": 715, "y": 307}
{"time_stamp": 21035.1208842, "action": "move", "x": 715, "y": 306}
{"time_stamp": 21035.1285261, "action": "move", "x": 715, "y": 306}
{"time_stamp": 21035.1366862, "action": "move", "x": 715, "y": 305}
{"time_stamp": 21035.1446592, "action": "move", "x": 716, "y": 305}
{"time_stamp": 21035.1528109, "action": "move", "x": 716, "y": 305}
{"time_stamp": 21035.1848109, "action": "move", "x": 716, "y": 304}
{"time_stamp": 21035.208994, "action": "move", "x": 717, "y": 304}
{"time_stamp": 21035.2571327, "action": "move", "x": 717, "y": 304}
{"time_stamp": 21035.2573543, "action": "click", "x": 717, "y": 304, "button": "left", "pressed": true}
{"time_stamp": 21035.3377191, "action": "move", "x": 717, "y": 304}
{"time_stamp": 21035.3379572, "action": "click", "x": 717, "y": 304, "button": "left", "pressed": false}

View File

@@ -0,0 +1,34 @@
import cv2
from matplotlib import pyplot as plt
# Load the image
image = cv2.imread('../../mm_agents/stackoverflow.png')
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding to get a binary image
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filter out contours that are not of cell size
# This is done by assuming that cells will have a relatively standard size
# The size filter is just a placeholder, real values depend on the actual image size
min_cell_size = 500
max_cell_size = 5000
cell_contours = [cnt for cnt in contours if min_cell_size < cv2.contourArea(cnt) < max_cell_size]
# Draw contours on the image
contour_output = image.copy()
cv2.drawContours(contour_output, cell_contours, -1, (0, 255, 0), 2)
# Display the image with cell contours
plt.figure(figsize=(12,6))
plt.imshow(cv2.cvtColor(contour_output, cv2.COLOR_BGR2RGB))
plt.title('Spreadsheet with Cell Contours')
plt.axis('off')
plt.show()

View File

@@ -0,0 +1,32 @@
from PIL import Image, ImageDraw
def mark_point(image_path: str, x: int, y: int, radius: int = 5, color: str = 'red') -> str:
"""
Mark a point on an image and save the image.
"""
# Load the image
image = Image.open(image_path)
# Create a draw object
draw = ImageDraw.Draw(image)
# Draw a small circle to mark the point
draw.ellipse((x - radius, y - radius, x + radius, y + radius), fill=color, outline=color)
# Save the image with the point marked
marked_image_path = image_path[:-4] + '_marked' + image_path[-4:]
image.save(marked_image_path)
return marked_image_path
if __name__ == '__main__':
image_path = 'chrome_start.png'
x = 100
y = 200
radius = 30
color = 'red'
marked_image_path = mark_point(image_path, x, y, radius, color)
print(f"Marked image saved to {marked_image_path}")