Fix conflicts
This commit is contained in:
@@ -1,7 +1,8 @@
|
|||||||
# DesktopEnv: A Learning Environment for Human-like Computer Task Mastery
|
# DesktopEnv: An Environment towards Human-like Computer Task Mastery
|
||||||
|
|
||||||
## Setup guide
|
## Setup guide
|
||||||
|
|
||||||
|
### For members of the team
|
||||||
1. Download OS image
|
1. Download OS image
|
||||||
1. Download kubuntu from <https://kubuntu.org/getkubuntu/>
|
1. Download kubuntu from <https://kubuntu.org/getkubuntu/>
|
||||||
2. Download ubuntu from <https://ubuntu.com/download/desktop>
|
2. Download ubuntu from <https://ubuntu.com/download/desktop>
|
||||||
@@ -22,7 +23,8 @@
|
|||||||
2. `rm -rf ~/screenshot.png`
|
2. `rm -rf ~/screenshot.png`
|
||||||
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
|
7. Set up python and install [mouse](https://github.com/boppreh/mouse/) and [keyboard](https://github.com/jordansissel/xdotool)
|
||||||
|
|
||||||
|
### For users of the environment
|
||||||
|
todo
|
||||||
|
|
||||||
## Road map (Proposed)
|
## Road map (Proposed)
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,6 @@
|
|||||||
# Server Setup Guide
|
# Server Setup Guide
|
||||||
|
|
||||||
- [Linux](#linux)
|
1. Copy and paste the file `server/main.py` to the windows vm
|
||||||
- [Windows](#windows)
|
2. Install the requirements `pip install -r requirements.txt`
|
||||||
|
|
||||||
## Linux
|
|
||||||
|
|
||||||
<https://averagelinuxuser.com/ssh-into-virtualbox/>
|
|
||||||
|
|
||||||
1. `sudo apt install openssh-server`
|
|
||||||
2. `sudo systemctl enable ssh --now`
|
|
||||||
3. `sudo ufw disable` (disable firewall - safe for local network, otherwise `sudo ufw allow ssh`)
|
|
||||||
4. `ip a` - find ip address
|
|
||||||
5. ssh username@<ip_address>
|
|
||||||
6. On host, run `ssh-copy-id <username>@<ip_address>`
|
|
||||||
|
|
||||||
|
|
||||||
## Windows
|
|
||||||
|
|
||||||
1. Copy and paste the file `windows_server/main.py` to the windows vm
|
|
||||||
2. Make sure `mouse` and `keyboard` are installed
|
|
||||||
3. Run the file `python main.py`
|
3. Run the file `python main.py`
|
||||||
4. `ipconfig /all` and find the ip address
|
4. `ipconfig /all` and find the ip address
|
||||||
BIN
desktop_env/assets/cursor.png
Normal file
BIN
desktop_env/assets/cursor.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.7 KiB |
@@ -1,56 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
from fabric import Connection
|
|
||||||
|
|
||||||
from .xdotool import XDoToolController
|
|
||||||
from .python import PythonController
|
|
||||||
|
|
||||||
class AbstractKeyboardController(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def type(self, text: str):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def key(self, key: str):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def key_down(self, key: str):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def key_up(self, key: str):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class XDoToolKeyboardController(AbstractKeyboardController, XDoToolController):
|
|
||||||
def __init__(self, ssh_connection: Connection):
|
|
||||||
super().__init__(ssh_connection=ssh_connection)
|
|
||||||
|
|
||||||
def type(self, text: str):
|
|
||||||
self._execute_xdotool_command(f"type {text}")
|
|
||||||
|
|
||||||
def key(self, key: str):
|
|
||||||
self._execute_xdotool_command(f"key {key}")
|
|
||||||
|
|
||||||
def key_down(self, key: str):
|
|
||||||
self._execute_xdotool_command(f"keydown {key}")
|
|
||||||
|
|
||||||
def key_up(self, key: str):
|
|
||||||
self._execute_xdotool_command(f"keyup {key}")
|
|
||||||
|
|
||||||
class PythonKeyboardController(AbstractKeyboardController, PythonController):
|
|
||||||
def __init__(self, http_server: str):
|
|
||||||
super().__init__(http_server=http_server)
|
|
||||||
self.command = "python -c \"import keyboard; {command}\""
|
|
||||||
|
|
||||||
def type(self, text: str):
|
|
||||||
self._execute_python_command(self.command.format(command=f"keyboard.write('{text}')"))
|
|
||||||
|
|
||||||
def key(self, key: str):
|
|
||||||
self._execute_python_command(self.command.format(command=f"keyboard.press_and_release('{key}')"))
|
|
||||||
|
|
||||||
def key_down(self, key: str):
|
|
||||||
self._execute_python_command(self.command.format(command=f"keyboard.press('{key}')"))
|
|
||||||
|
|
||||||
def key_up(self, key: str):
|
|
||||||
self._execute_python_command(self.command.format(command=f"keyboard.release('{key}')"))
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from fabric import Connection
|
|
||||||
|
|
||||||
from .xdotool import XDoToolController
|
|
||||||
from .python import PythonController
|
|
||||||
class MouseClick(Enum):
|
|
||||||
LEFT = 1
|
|
||||||
MIDDLE = 2
|
|
||||||
RIGHT = 3
|
|
||||||
WHEEL_UP = 4
|
|
||||||
WHEEL_DOWN = 5
|
|
||||||
|
|
||||||
class AbstractMouseController(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def mouse_move(self, x: int, y: int):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def left_down(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def left_up(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def left_click(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def middle_down(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def middle_up(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def middle_click(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def right_down(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def right_up(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def right_click(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def scroll_up(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def scroll_down(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class XDoToolMouseController(AbstractMouseController, XDoToolController):
|
|
||||||
def __init__(self, ssh_connection: Connection):
|
|
||||||
super().__init__(ssh_connection=ssh_connection)
|
|
||||||
|
|
||||||
def mouse_move(self, x: int, y: int):
|
|
||||||
self._execute_xdotool_command(f"mousemove {x} {y}")
|
|
||||||
|
|
||||||
def left_down(self):
|
|
||||||
self._execute_xdotool_command(f"mousedown 1")
|
|
||||||
|
|
||||||
def left_up(self):
|
|
||||||
self._execute_xdotool_command(f"mouseup 1")
|
|
||||||
|
|
||||||
def left_click(self):
|
|
||||||
self._execute_xdotool_command(f"click 1")
|
|
||||||
|
|
||||||
def middle_down(self):
|
|
||||||
self._execute_xdotool_command(f"mousedown 2")
|
|
||||||
|
|
||||||
def middle_up(self):
|
|
||||||
self._execute_xdotool_command(f"mouseup 2")
|
|
||||||
|
|
||||||
def middle_click(self):
|
|
||||||
self._execute_xdotool_command(f"click 2")
|
|
||||||
|
|
||||||
def right_down(self):
|
|
||||||
self._execute_xdotool_command(f"mousedown 3")
|
|
||||||
|
|
||||||
def right_up(self):
|
|
||||||
self._execute_xdotool_command(f"mouseup 3")
|
|
||||||
|
|
||||||
def right_click(self):
|
|
||||||
self._execute_xdotool_command(f"click 3")
|
|
||||||
|
|
||||||
def scroll_up(self):
|
|
||||||
self._execute_xdotool_command(f"click 4")
|
|
||||||
|
|
||||||
def scroll_down(self):
|
|
||||||
self._execute_xdotool_command(f"click 5")
|
|
||||||
|
|
||||||
class PythonMouseController(AbstractMouseController, PythonController):
|
|
||||||
def __init__(self, http_server: str):
|
|
||||||
super().__init__(http_server=http_server)
|
|
||||||
self.command = "python -c \"import mouse; {command}\""
|
|
||||||
|
|
||||||
def mouse_move(self, x: int, y: int):
|
|
||||||
self._execute_python_command(self.command.format(command=f"mouse.move({x}, {y})"))
|
|
||||||
|
|
||||||
def left_down(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.press(button='left')"))
|
|
||||||
|
|
||||||
def left_up(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.release(button='left')"))
|
|
||||||
|
|
||||||
def left_click(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.click(button='left')"))
|
|
||||||
|
|
||||||
def middle_down(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.press(button='middle')"))
|
|
||||||
|
|
||||||
def middle_up(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.release(button='middle')"))
|
|
||||||
|
|
||||||
def middle_click(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.click(button='middle')"))
|
|
||||||
|
|
||||||
def right_down(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.press(button='right')"))
|
|
||||||
|
|
||||||
def right_up(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.release(button='right')"))
|
|
||||||
|
|
||||||
def right_click(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.click(button='right')"))
|
|
||||||
|
|
||||||
def scroll_up(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.wheel(10)"))
|
|
||||||
|
|
||||||
def scroll_down(self):
|
|
||||||
self._execute_python_command(self.command.format(command="mouse.wheel(-10)"))
|
|
||||||
@@ -1,34 +1,208 @@
|
|||||||
import requests
|
|
||||||
import json
|
import json
|
||||||
|
from typing import Any, Dict
|
||||||
|
import requests
|
||||||
|
from desktop_env.envs.actions import KEYBOARD_KEYS
|
||||||
|
|
||||||
|
|
||||||
class PythonController:
|
class PythonController:
|
||||||
def __init__(self, http_server: str):
|
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
|
||||||
self.http_server = http_server
|
self.http_server = http_server
|
||||||
|
self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
|
||||||
def _execute_python_command(self, command: str) -> None:
|
|
||||||
payload = json.dumps({
|
def get_screenshot(self):
|
||||||
"command": command
|
"""
|
||||||
})
|
Gets a screenshot from the server. With the cursor.
|
||||||
|
"""
|
||||||
|
response = requests.get(self.http_server + "/screenshot")
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.content
|
||||||
|
else:
|
||||||
|
print("Failed to get screenshot. Status code:", response.status_code)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_file(self, file_path: str):
|
||||||
|
"""
|
||||||
|
Gets a file from the server.
|
||||||
|
"""
|
||||||
|
response = requests.post(self.http_server + "/file", data={"file_path": file_path})
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("File downloaded successfully")
|
||||||
|
return response.content
|
||||||
|
else:
|
||||||
|
print("Failed to get file. Status code:", response.status_code)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def execute_python_command(self, command: str) -> None:
|
||||||
|
"""
|
||||||
|
Executes a python command on the server.
|
||||||
|
It can be used to execute the pyautogui commands, or... any other python command. who knows?
|
||||||
|
"""
|
||||||
|
command = self.pkgs_prefix.format(command=command)
|
||||||
|
payload = json.dumps({"command": command})
|
||||||
headers = {
|
headers = {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
|
response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
print("Command executed successfully:", response.text)
|
print("Command executed successfully:", response.text)
|
||||||
else:
|
else:
|
||||||
print("Failed to execute command. Status code:", response.status_code)
|
print("Failed to execute command. Status code:", response.status_code)
|
||||||
|
return response.json()
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
print("An error occurred while trying to execute the command:", e)
|
print("An error occurred while trying to execute the command:", e)
|
||||||
|
|
||||||
# example usage
|
def execute_action(self, action: Dict[str, Any]):
|
||||||
if __name__ == '__main__':
|
"""
|
||||||
# replace with your actual server URL of the vm
|
Executes an action on the server computer.
|
||||||
server_url = "http://192.168.7.129:5000"
|
"""
|
||||||
controller = PythonController(server_url)
|
|
||||||
|
|
||||||
# example commands
|
action_type = action["action_type"]
|
||||||
python_command = "python -c \"import keyboard; keyboard.write('hello world')\""
|
parameters = action["parameters"] if "parameters" in action else {}
|
||||||
python_command = "python -c \"import mouse; mouse.move(100,100);mouse.right_click()\""
|
|
||||||
controller._execute_python_command(python_command)
|
if action_type == "MOVE_TO":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.moveTo()")
|
||||||
|
elif "x" in parameters and "y" in parameters:
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
self.execute_python_command(f"pyautogui.moveTo({x}, {y})")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "CLICK":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.click()")
|
||||||
|
elif "button" in parameters and "x" in parameters and "y" in parameters:
|
||||||
|
button = parameters["button"]
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
if "num_clicks" in parameters:
|
||||||
|
num_clicks = parameters["num_clicks"]
|
||||||
|
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
|
||||||
|
else:
|
||||||
|
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
|
||||||
|
elif "button" in parameters and "x" not in parameters and "y" not in parameters:
|
||||||
|
button = parameters["button"]
|
||||||
|
if "num_clicks" in parameters:
|
||||||
|
num_clicks = parameters["num_clicks"]
|
||||||
|
self.execute_python_command(f"pyautogui.click(button='{button}', clicks={num_clicks})")
|
||||||
|
else:
|
||||||
|
self.execute_python_command(f"pyautogui.click(button='{button}')")
|
||||||
|
elif "button" not in parameters and "x" in parameters and "y" in parameters:
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
if "num_clicks" in parameters:
|
||||||
|
num_clicks = parameters["num_clicks"]
|
||||||
|
self.execute_python_command(f"pyautogui.click(x={x}, y={y}, clicks={num_clicks})")
|
||||||
|
else:
|
||||||
|
self.execute_python_command(f"pyautogui.click(x={x}, y={y})")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "MOUSE_DOWN":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.mouseDown()")
|
||||||
|
elif "button" in parameters:
|
||||||
|
button = parameters["button"]
|
||||||
|
self.execute_python_command(f"pyautogui.mouseDown(button='{button}')")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "MOUSE_UP":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.mouseUp()")
|
||||||
|
elif "button" in parameters:
|
||||||
|
button = parameters["button"]
|
||||||
|
self.execute_python_command(f"pyautogui.mouseUp(button='{button}')")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "RIGHT_CLICK":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.rightClick()")
|
||||||
|
elif "x" in parameters and "y" in parameters:
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
self.execute_python_command(f"pyautogui.rightClick(x={x}, y={y})")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "DOUBLE_CLICK":
|
||||||
|
if parameters == {} or None:
|
||||||
|
self.execute_python_command(f"pyautogui.doubleClick()")
|
||||||
|
elif "x" in parameters and "y" in parameters:
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
self.execute_python_command(f"pyautogui.doubleClick(x={x}, y={y})")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "DRAG_TO":
|
||||||
|
if "x" in parameters and "y" in parameters:
|
||||||
|
x = parameters["x"]
|
||||||
|
y = parameters["y"]
|
||||||
|
self.execute_python_command(f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
|
||||||
|
|
||||||
|
elif action_type == "SCROLL":
|
||||||
|
# todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
|
||||||
|
if "dx" in parameters and "dy" in parameters:
|
||||||
|
dx = parameters["dx"]
|
||||||
|
dy = parameters["dy"]
|
||||||
|
self.execute_python_command(f"pyautogui.hscroll({dx})")
|
||||||
|
self.execute_python_command(f"pyautogui.vscroll({dy})")
|
||||||
|
elif "dx" in parameters and "dy" not in parameters:
|
||||||
|
dx = parameters["dx"]
|
||||||
|
self.execute_python_command(f"pyautogui.hscroll({dx})")
|
||||||
|
elif "dx" not in parameters and "dy" in parameters:
|
||||||
|
dy = parameters["dy"]
|
||||||
|
self.execute_python_command(f"pyautogui.vscroll({dy})")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
|
||||||
|
elif action_type == "TYPING":
|
||||||
|
if "text" not in parameters:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
text = parameters["text"]
|
||||||
|
self.execute_python_command(f"pyautogui.typewrite('{text}')")
|
||||||
|
|
||||||
|
elif action_type == "PRESS":
|
||||||
|
if "key" not in parameters:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
key = parameters["key"]
|
||||||
|
if key.lower() not in KEYBOARD_KEYS:
|
||||||
|
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||||
|
self.execute_python_command(f"pyautogui.press('{key}')")
|
||||||
|
|
||||||
|
elif action_type == "KEY_DOWN":
|
||||||
|
if "key" not in parameters:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
key = parameters["key"]
|
||||||
|
if key.lower() not in KEYBOARD_KEYS:
|
||||||
|
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||||
|
self.execute_python_command(f"pyautogui.keyDown('{key}')")
|
||||||
|
|
||||||
|
elif action_type == "KEY_UP":
|
||||||
|
if "key" not in parameters:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
key = parameters["key"]
|
||||||
|
if key.lower() not in KEYBOARD_KEYS:
|
||||||
|
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||||
|
self.execute_python_command(f"pyautogui.keyUp('{key}')")
|
||||||
|
|
||||||
|
elif action_type == "HOTKEY":
|
||||||
|
if "keys" not in parameters:
|
||||||
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
|
keys = parameters["keys"]
|
||||||
|
if not isinstance(keys, list):
|
||||||
|
raise Exception(f"Keys must be a list of keys")
|
||||||
|
for key in keys:
|
||||||
|
if key.lower() not in KEYBOARD_KEYS:
|
||||||
|
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
|
||||||
|
|
||||||
|
keys_para_rep = "', '".join(keys)
|
||||||
|
self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unknown action type: {action_type}")
|
||||||
|
|||||||
96
desktop_env/controllers/setup.py
Normal file
96
desktop_env/controllers/setup.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class SetupController:
|
||||||
|
def __init__(self, http_server: str):
|
||||||
|
self.http_server = http_server + "/setup"
|
||||||
|
|
||||||
|
def setup(self, config):
|
||||||
|
"""
|
||||||
|
Setup Config:
|
||||||
|
{
|
||||||
|
download: list[tuple[string]], # a list of tuples of url of file to download and the save path
|
||||||
|
...
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
self._download_setup(config)
|
||||||
|
self._change_wallpaper(config)
|
||||||
|
# self._tidy_desktop(config) todo: implement this
|
||||||
|
self._open_setup(config)
|
||||||
|
# can add other setup steps
|
||||||
|
|
||||||
|
def _download_setup(self, config):
|
||||||
|
if not config:
|
||||||
|
return
|
||||||
|
if not 'download' in config:
|
||||||
|
return
|
||||||
|
for url, path in config['download']:
|
||||||
|
if not url or not path:
|
||||||
|
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
|
||||||
|
|
||||||
|
payload = json.dumps({"url": url, "path": path})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# send request to server to download file
|
||||||
|
try:
|
||||||
|
response = requests.post(self.http_server + "/download_file", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("Command executed successfully:", response.text)
|
||||||
|
else:
|
||||||
|
print("Failed to download file. Status code:", response.text)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print("An error occurred while trying to send the request:", e)
|
||||||
|
|
||||||
|
def _change_wallpaper(self, config):
|
||||||
|
if not config:
|
||||||
|
return
|
||||||
|
if not 'wallpaper' in config:
|
||||||
|
return
|
||||||
|
path = config['wallpaper']
|
||||||
|
if not path:
|
||||||
|
raise Exception(f"Setup Wallpaper - Invalid path ({path}).")
|
||||||
|
|
||||||
|
payload = json.dumps({"path": path})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# send request to server to change wallpaper
|
||||||
|
try:
|
||||||
|
response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("Command executed successfully:", response.text)
|
||||||
|
else:
|
||||||
|
print("Failed to change wallpaper. Status code:", response.text)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print("An error occurred while trying to send the request:", e)
|
||||||
|
|
||||||
|
def _tidy_desktop(self, config):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _open_setup(self, config):
|
||||||
|
if not config:
|
||||||
|
return
|
||||||
|
if not 'open' in config:
|
||||||
|
return
|
||||||
|
for path in config['open']:
|
||||||
|
if not path:
|
||||||
|
raise Exception(f"Setup Open - Invalid path ({path}).")
|
||||||
|
|
||||||
|
payload = json.dumps({"path": path})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# send request to server to open file
|
||||||
|
try:
|
||||||
|
response = requests.post(self.http_server + "/open_file", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("Command executed successfully:", response.text)
|
||||||
|
else:
|
||||||
|
print("Failed to open file. Status code:", response.text)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print("An error occurred while trying to send the request:", e)
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
from fabric import Connection
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
|
|
||||||
class XDoToolController:
|
|
||||||
def __init__(self, ssh_connection: Connection):
|
|
||||||
self.ssh_connection = ssh_connection
|
|
||||||
|
|
||||||
def _execute_xdotool_command(self, command: List[str]) -> None:
|
|
||||||
result = self.ssh_connection.run(f"DISPLAY=:0 xdotool {command}", hide=True)
|
|
||||||
return result.stdout.strip()
|
|
||||||
190
desktop_env/envs/actions.py
Normal file
190
desktop_env/envs/actions.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
X_MAX = 1920 # TODO: get the screen resolution
|
||||||
|
Y_MAX = 1080
|
||||||
|
|
||||||
|
KEYBOARD_KEYS = ['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright']
|
||||||
|
|
||||||
|
ACTION_SPACE = [
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"note": "move the cursor to the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": False,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"num_clicks": {
|
||||||
|
"type": int,
|
||||||
|
"range": [1, 2, 3],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"note": "press the left button if the button not specified, otherwise press the specified button",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"note": "release the left button if the button not specified, otherwise release the specified button",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "RIGHT_CLICK",
|
||||||
|
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DOUBLE_CLICK",
|
||||||
|
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DRAG_TO",
|
||||||
|
"note": "drag the cursor to the specified position with the left button pressed",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": False,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "SCROLL",
|
||||||
|
"note": "scroll the mouse wheel up or down",
|
||||||
|
"parameters": {
|
||||||
|
"dx": {
|
||||||
|
"type": int,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
},
|
||||||
|
"dy": {
|
||||||
|
"type": int,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "TYPING",
|
||||||
|
"note": "type the specified text",
|
||||||
|
"parameters": {
|
||||||
|
"text": {
|
||||||
|
"type": str,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "PRESS",
|
||||||
|
"note": "press the specified key and release it",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_DOWN",
|
||||||
|
"note": "press the specified key",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_UP",
|
||||||
|
"note": "release the specified key",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "HOTKEY",
|
||||||
|
"note": "press the specified key combination",
|
||||||
|
"parameters": {
|
||||||
|
"keys": {
|
||||||
|
"type": list,
|
||||||
|
"range": [KEYBOARD_KEYS],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
@@ -1,78 +1,61 @@
|
|||||||
from enum import Enum
|
from __future__ import annotations
|
||||||
from typing import Literal, List, Tuple
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from fabric import Connection
|
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
|
import platform
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
from gymnasium import spaces
|
import requests
|
||||||
import numpy as np
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from desktop_env.controllers.mouse import MouseClick, AbstractMouseController, XDoToolMouseController, PythonMouseController
|
from desktop_env.controllers.python import PythonController
|
||||||
from desktop_env.controllers.keyboard import AbstractKeyboardController, XDoToolKeyboardController, PythonKeyboardController
|
from desktop_env.controllers.setup import SetupController
|
||||||
|
from desktop_env.evaluators import eval_funcs
|
||||||
class Action(Enum):
|
|
||||||
CLICK = 0
|
|
||||||
MOUSE_DOWN = 1
|
|
||||||
MOUSE_UP = 2
|
|
||||||
MOUSE_MOVE = 3
|
|
||||||
KEY = 4
|
|
||||||
KEY_DOWN = 5
|
|
||||||
KEY_UP = 6
|
|
||||||
TYPE = 7
|
|
||||||
|
|
||||||
|
|
||||||
VM_TYPE = Literal['ubuntu', 'windows']
|
def _execute_command(command: List[str]) -> None:
|
||||||
|
if command[:4] == ["vmrun", "-T", "ws", "start"]:
|
||||||
|
p = subprocess.Popen(command)
|
||||||
|
p.wait()
|
||||||
|
else:
|
||||||
|
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m")
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
|
||||||
class DesktopEnv(gym.Env):
|
class DesktopEnv(gym.Env):
|
||||||
"""DesktopEnv with OpenAI Gym interface."""
|
"""DesktopEnv with OpenAI Gym interface."""
|
||||||
|
|
||||||
def __init__(self, path_to_vm: str, username: str, password: str,
|
def __init__(
|
||||||
host: str, snapshot_path: str = "some_point_browser", vm_os: VM_TYPE = "ubuntu"):
|
self,
|
||||||
|
path_to_vm: str,
|
||||||
|
snapshot_path: str = "base",
|
||||||
|
instruction: str = None,
|
||||||
|
config: dict = None,
|
||||||
|
evaluator: dict = None,
|
||||||
|
action_space: str = "computer_13",
|
||||||
|
):
|
||||||
|
# Initialize environment variables
|
||||||
self.path_to_vm = path_to_vm
|
self.path_to_vm = path_to_vm
|
||||||
self.username = username
|
|
||||||
self.password = password
|
|
||||||
self.host = host
|
|
||||||
self.snapshot_path = snapshot_path # todo: handling the logic of snapshot directory
|
self.snapshot_path = snapshot_path # todo: handling the logic of snapshot directory
|
||||||
|
|
||||||
self.screen_width = 800
|
# Initialize emulator and controller
|
||||||
self.screen_height = 800
|
|
||||||
# Define the action and observation space
|
|
||||||
self.action_space = spaces.Dict({
|
|
||||||
"action_type": spaces.Discrete(len(Action)),
|
|
||||||
"click_type": spaces.Discrete(len(MouseClick)),
|
|
||||||
"x": spaces.Discrete(self.screen_width),
|
|
||||||
"y": spaces.Discrete(self.screen_height),
|
|
||||||
"key": spaces.MultiDiscrete([128] * 10), # max 10 characters, ASCII
|
|
||||||
"text": spaces.MultiDiscrete([128] * 10) # max 10 characters, ASCII
|
|
||||||
})
|
|
||||||
|
|
||||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
|
|
||||||
|
|
||||||
# Additional setup
|
|
||||||
self.metadata = {'render.modes': ['rgb_array']}
|
|
||||||
|
|
||||||
# Initialize emulator
|
|
||||||
print("Initializing...")
|
print("Initializing...")
|
||||||
self._start_emulator()
|
self._start_emulator()
|
||||||
|
self.host = f"http://{self._get_vm_ip()}:5000"
|
||||||
|
self.controller = PythonController(http_server=self.host)
|
||||||
|
self.setup_controller = SetupController(http_server=self.host)
|
||||||
|
self.instruction = instruction
|
||||||
|
self.config = config
|
||||||
|
self.evaluator = evaluator
|
||||||
|
|
||||||
# set up controllers
|
# mode: human or machine
|
||||||
self.mouse_controller, self.keyboard_controller = self._create_controllers(vm_os)
|
assert action_space in ["computer_13", "pyautogui"]
|
||||||
|
self.action_space = action_space
|
||||||
def _create_controllers(self, vm_os: VM_TYPE) -> Tuple[AbstractMouseController, AbstractKeyboardController]:
|
# todo: define the action space and the observation space as gym did, or extend theirs
|
||||||
if vm_os == "ubuntu":
|
|
||||||
ssh_connection = Connection(host=self.host, user=self.username, connect_kwargs={"password": self.password})
|
|
||||||
mouse_controller = XDoToolMouseController(ssh_connection)
|
|
||||||
keyboard_controller = XDoToolKeyboardController(ssh_connection)
|
|
||||||
elif vm_os == "windows":
|
|
||||||
mouse_controller = PythonMouseController(http_server=self.host)
|
|
||||||
keyboard_controller = PythonKeyboardController(http_server=self.host)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(vm_os)
|
|
||||||
|
|
||||||
return mouse_controller, keyboard_controller
|
|
||||||
|
|
||||||
def _start_emulator(self):
|
def _start_emulator(self):
|
||||||
while True:
|
while True:
|
||||||
@@ -84,108 +67,120 @@ class DesktopEnv(gym.Env):
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print("Starting VM...")
|
print("Starting VM...")
|
||||||
self._execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
|
_execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
|
||||||
time.sleep(5)
|
time.sleep(3)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(f"Error executing command: {e.output.decode().strip()}")
|
print(f"Error executing command: {e.output.decode().strip()}")
|
||||||
|
|
||||||
def _execute_command(self, command: List[str]) -> None:
|
def _get_vm_ip(self):
|
||||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
max_retries = 10
|
||||||
stdout, stderr = process.communicate()
|
print("Getting IP Address...")
|
||||||
if process.returncode != 0:
|
for _ in range(max_retries):
|
||||||
print(f"Error executing command: {command}")
|
try:
|
||||||
return None
|
output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
|
||||||
else:
|
print(f"IP address: {output}")
|
||||||
return stdout.decode()
|
return output
|
||||||
|
except:
|
||||||
|
time.sleep(5)
|
||||||
|
print("Retrying...")
|
||||||
|
raise Exception("Failed to get VM IP address!")
|
||||||
|
|
||||||
def _save_state(self):
|
def _save_state(self):
|
||||||
self._execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
|
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
|
||||||
|
|
||||||
def _get_screenshot(self):
|
def _get_screenshot(self):
|
||||||
image_path = "./screenshot.png"
|
random_uuid = str(uuid.uuid4())
|
||||||
self._execute_command(
|
os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
|
||||||
["vmrun", "-T", "ws", "-gu", self.username, "-gp", self.password, "captureScreen", self.path_to_vm,
|
image_path = os.path.join("tmp", random_uuid, "screenshot.png")
|
||||||
image_path])
|
|
||||||
|
# Get the screenshot and save to the image_path
|
||||||
|
screenshot = self.controller.get_screenshot()
|
||||||
|
with open(image_path, "wb") as f:
|
||||||
|
f.write(screenshot)
|
||||||
|
|
||||||
return image_path
|
return image_path
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
screenshot_image_path = self._get_screenshot()
|
screenshot_image_path = self._get_screenshot()
|
||||||
with Image.open(screenshot_image_path) as img:
|
return screenshot_image_path
|
||||||
return np.array(img)
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, seed=None, options=None):
|
||||||
print("Resetting environment...")
|
print("Resetting environment...")
|
||||||
|
|
||||||
print("Reverting to snapshot to {}...".format(self.snapshot_path))
|
print("Reverting to snapshot to {}...".format(self.snapshot_path))
|
||||||
self._execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
|
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
print("Starting emulator...")
|
print("Starting emulator...")
|
||||||
self._start_emulator()
|
self._start_emulator()
|
||||||
print("Emulator started.")
|
print("Emulator started.")
|
||||||
|
|
||||||
|
print("Setting up environment...")
|
||||||
|
self.setup_controller.setup(self.config)
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
print("Environment setup complete.")
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action, pause=0.5):
|
||||||
action_type = Action(action['action_type'])
|
# fixme: add reminding logic here, decide if the action is valid for the current action_space
|
||||||
if action_type == Action.CLICK:
|
if self.action_space == "computer_13":
|
||||||
click = MouseClick(action['click_type'])
|
# the set of all possible actions defined in the action representation
|
||||||
if click == MouseClick.LEFT:
|
self.controller.execute_action(action)
|
||||||
self.mouse_controller.left_click()
|
elif self.action_space == "pyautogui":
|
||||||
elif click == MouseClick.MIDDLE:
|
# the set of all possible python commands insides `pyautogui`
|
||||||
self.mouse_controller.middle_click()
|
self.controller.execute_python_command(action)
|
||||||
elif click == MouseClick.RIGHT:
|
|
||||||
self.mouse_controller.right_click()
|
|
||||||
elif click == MouseClick.WHEEL_UP:
|
|
||||||
self.mouse_controller.scroll_up()
|
|
||||||
elif click == MouseClick.WHEEL_DOWN:
|
|
||||||
self.mouse_controller.scroll_down()
|
|
||||||
elif action_type == Action.MOUSE_DOWN:
|
|
||||||
click = MouseClick(action['click_type'])
|
|
||||||
if click == MouseClick.LEFT:
|
|
||||||
self.mouse_controller.left_down()
|
|
||||||
elif click == MouseClick.MIDDLE:
|
|
||||||
self.mouse_controller.middle_down()
|
|
||||||
elif click == MouseClick.RIGHT:
|
|
||||||
self.mouse_controller.right_down()
|
|
||||||
elif click == MouseClick.WHEEL_UP:
|
|
||||||
self.mouse_controller.scroll_up()
|
|
||||||
elif click == MouseClick.WHEEL_DOWN:
|
|
||||||
self.mouse_controller.scroll_down()
|
|
||||||
elif action_type == Action.MOUSE_UP:
|
|
||||||
click = MouseClick(action['click_type'])
|
|
||||||
if click == MouseClick.LEFT:
|
|
||||||
self.mouse_controller.left_up()
|
|
||||||
elif click == MouseClick.MIDDLE:
|
|
||||||
self.mouse_controller.middle_up()
|
|
||||||
elif click == MouseClick.RIGHT:
|
|
||||||
self.mouse_controller.right_up()
|
|
||||||
elif click == MouseClick.WHEEL_UP:
|
|
||||||
self.mouse_controller.scroll_up()
|
|
||||||
elif click == MouseClick.WHEEL_DOWN:
|
|
||||||
self.mouse_controller.scroll_down()
|
|
||||||
elif action_type == Action.MOUSE_MOVE:
|
|
||||||
self.mouse_controller.mouse_move(x = action['x'], y = action['y'])
|
|
||||||
elif action_type == Action.KEY:
|
|
||||||
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
|
||||||
self.keyboard_controller.key(key_sequence)
|
|
||||||
elif action_type == Action.KEY_DOWN:
|
|
||||||
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
|
||||||
self.keyboard_controller.key_down(key_sequence)
|
|
||||||
elif action_type == Action.KEY_UP:
|
|
||||||
key_sequence = ''.join(map(chr, action['key'])) # Convert integer array to string
|
|
||||||
self.keyboard_controller.key_up(key_sequence)
|
|
||||||
elif action_type == Action.TYPE:
|
|
||||||
text = ''.join(map(chr, action['text'])) # Convert integer array to string
|
|
||||||
self.keyboard_controller.type(text)
|
|
||||||
|
|
||||||
# Capture new state
|
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
|
||||||
observation = self._get_obs()
|
time.sleep(pause)
|
||||||
reward = 0 # Define reward calculation
|
observation = {
|
||||||
done = False # Define episode termination condition
|
"screenshot": self._get_obs(),
|
||||||
|
"instruction": self.instruction
|
||||||
|
}
|
||||||
|
reward = 0 # todo: Define reward calculation for each example
|
||||||
|
done = False # todo: Define episode termination condition for each example
|
||||||
info = {}
|
info = {}
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
def evaluate(self):
|
||||||
|
"""
|
||||||
|
Evaluate whether the task is successfully completed.
|
||||||
|
"""
|
||||||
|
def copy_file_to_local(_file_info):
|
||||||
|
random_uuid = str(uuid.uuid4())
|
||||||
|
os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True)
|
||||||
|
_path = os.path.join("tmp", random_uuid, "tmp.xlsx")
|
||||||
|
if _file_info["type"] == "cloud_file":
|
||||||
|
url = _file_info["path"]
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(_path, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
elif _file_info["type"] == "vm_file":
|
||||||
|
# fixme: stream this part maybe as well
|
||||||
|
file = self.controller.get_file(_file_info["path"])
|
||||||
|
with open(_path, "wb") as f:
|
||||||
|
f.write(file)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
return _path
|
||||||
|
|
||||||
|
# todo: make this more flexible by refactoring
|
||||||
|
eval_func = eval_funcs[self.evaluator["func"]]
|
||||||
|
eval_func_vars = {}
|
||||||
|
|
||||||
|
for var_name, file_info in self.evaluator["paths"].items():
|
||||||
|
path = copy_file_to_local(file_info)
|
||||||
|
eval_func_vars[var_name] = path
|
||||||
|
|
||||||
|
return eval_func(**eval_func_vars)
|
||||||
|
|
||||||
def render(self, mode='rgb_array'):
|
def render(self, mode='rgb_array'):
|
||||||
if mode == 'rgb_array':
|
if mode == 'rgb_array':
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
@@ -193,4 +188,4 @@ class DesktopEnv(gym.Env):
|
|||||||
raise ValueError('Unsupported render mode: {}'.format(mode))
|
raise ValueError('Unsupported render mode: {}'.format(mode))
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self._execute_command(["vmrun", "stop", self.path_to_vm])
|
_execute_command(["vmrun", "stop", self.path_to_vm])
|
||||||
|
|||||||
5
desktop_env/evaluators/__init__.py
Normal file
5
desktop_env/evaluators/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from .table import compare_table
|
||||||
|
|
||||||
|
eval_funcs = {
|
||||||
|
"compare_table(expected, actual)": compare_table
|
||||||
|
}
|
||||||
0
desktop_env/evaluators/replay.py
Normal file
0
desktop_env/evaluators/replay.py
Normal file
14
desktop_env/evaluators/table.py
Normal file
14
desktop_env/evaluators/table.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
def compare_table(expected, actual):
|
||||||
|
import pandas as pd
|
||||||
|
df1 = pd.read_excel(expected)
|
||||||
|
df2 = pd.read_excel(actual)
|
||||||
|
|
||||||
|
# Compare the DataFrames
|
||||||
|
return 1 if df1.equals(df2) else 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
path1 = ""
|
||||||
|
path2 = ""
|
||||||
|
|
||||||
|
print(compare_table(path1, path2))
|
||||||
184
desktop_env/server/main.py
Normal file
184
desktop_env/server/main.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import Xlib.display
|
||||||
|
import pyautogui
|
||||||
|
from PIL import ImageGrab, Image
|
||||||
|
from flask import Flask, request, jsonify, send_file
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
pyautogui.PAUSE = 0
|
||||||
|
pyautogui.DARWIN_CATCH_UP_TIME = 0
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/execute', methods=['POST'])
|
||||||
|
def execute_command():
|
||||||
|
data = request.json
|
||||||
|
# The 'command' key in the JSON request should contain the command to be executed.
|
||||||
|
command = data.get('command', '')
|
||||||
|
|
||||||
|
# Execute the command without any safety checks.
|
||||||
|
try:
|
||||||
|
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
return jsonify({
|
||||||
|
'status': 'success',
|
||||||
|
'output': result.stdout,
|
||||||
|
'error': result.stderr
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': str(e)
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/screenshot', methods=['GET'])
|
||||||
|
def capture_screen_with_cursor():
|
||||||
|
file_path = os.path.join("screenshots", "screenshot.png")
|
||||||
|
user_platform = platform.system()
|
||||||
|
|
||||||
|
# Ensure the screenshots directory exists
|
||||||
|
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||||
|
|
||||||
|
if user_platform == "Windows":
|
||||||
|
def _download_image(url, path):
|
||||||
|
response = requests.get(url)
|
||||||
|
with open(path, 'wb') as file:
|
||||||
|
file.write(response.content)
|
||||||
|
|
||||||
|
cursor_path = os.path.join("screenshots", "cursor.png")
|
||||||
|
if not os.path.exists(cursor_path):
|
||||||
|
cursor_url = "https://vip.helloimg.com/images/2023/12/02/oQPzmt.png"
|
||||||
|
_download_image(cursor_url, cursor_path)
|
||||||
|
screenshot = pyautogui.screenshot()
|
||||||
|
cursor_x, cursor_y = pyautogui.position()
|
||||||
|
cursor = Image.open(cursor_path)
|
||||||
|
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
|
||||||
|
screenshot.save(file_path)
|
||||||
|
elif user_platform == "Linux":
|
||||||
|
# Use xlib to prevent scrot dependency for Linux
|
||||||
|
screen = Xlib.display.Display().screen()
|
||||||
|
size = screen.width_in_pixels, screen.height_in_pixels
|
||||||
|
screenshot = ImageGrab.grab(bbox=(0, 0, size[0], size[1]))
|
||||||
|
screenshot.save(file_path)
|
||||||
|
elif user_platform == "Darwin": # (Mac OS)
|
||||||
|
# Use the screencapture utility to capture the screen with the cursor
|
||||||
|
subprocess.run(["screencapture", "-C", file_path])
|
||||||
|
else:
|
||||||
|
print(f"The platform you're using ({user_platform}) is not currently supported")
|
||||||
|
|
||||||
|
return send_file(file_path, mimetype='image/png')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/file', methods=['POST'])
|
||||||
|
def get_file():
|
||||||
|
# Retrieve filename from the POST request
|
||||||
|
if 'file_path' in request.form:
|
||||||
|
file_path = request.form['file_path']
|
||||||
|
else:
|
||||||
|
return jsonify({"error": "file_path is required"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check if the file exists and send it to the user
|
||||||
|
return send_file(file_path, as_attachment=True)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# If the file is not found, return a 404 error
|
||||||
|
return jsonify({"error": "File not found"}), 404
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/platform', methods=['GET'])
|
||||||
|
def get_platform():
|
||||||
|
return platform.system()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/cursor_position', methods=['GET'])
|
||||||
|
def get_cursor_position():
|
||||||
|
return pyautogui.position().x, pyautogui.position().y
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/setup/change_wallpaper", methods=['POST'])
|
||||||
|
def change_wallpaper():
|
||||||
|
data = request.json
|
||||||
|
path = data.get('path', None)
|
||||||
|
|
||||||
|
if not path:
|
||||||
|
return "Path not supplied!", 400
|
||||||
|
|
||||||
|
path = Path(path)
|
||||||
|
|
||||||
|
if not path.exists():
|
||||||
|
return f"File not found: {path}", 404
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_platform = platform.system()
|
||||||
|
if user_platform == "Windows":
|
||||||
|
import ctypes
|
||||||
|
ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3)
|
||||||
|
elif user_platform == "Linux":
|
||||||
|
import subprocess
|
||||||
|
subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"])
|
||||||
|
elif user_platform == "Darwin": # (Mac OS)
|
||||||
|
import subprocess
|
||||||
|
subprocess.run(
|
||||||
|
["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"'])
|
||||||
|
return "Wallpaper changed successfully"
|
||||||
|
except Exception as e:
|
||||||
|
return f"Failed to change wallpaper. Error: {e}", 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/setup/download_file", methods=['POST'])
|
||||||
|
def download_file():
|
||||||
|
data = request.json
|
||||||
|
url = data.get('url', None)
|
||||||
|
path = data.get('path', None)
|
||||||
|
|
||||||
|
if not url or not path:
|
||||||
|
return "Path or URL not supplied!", 400
|
||||||
|
|
||||||
|
path = Path(path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
max_retries = 3
|
||||||
|
for i in range(max_retries):
|
||||||
|
try:
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
return "File downloaded successfully"
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||||
|
|
||||||
|
return f"Failed to download {url}. No retries left. Error: {e}", 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/setup/open_file", methods=['POST'])
|
||||||
|
def open_file():
|
||||||
|
data = request.json
|
||||||
|
path = data.get('path', None)
|
||||||
|
|
||||||
|
if not path:
|
||||||
|
return "Path not supplied!", 400
|
||||||
|
|
||||||
|
path = Path(path)
|
||||||
|
|
||||||
|
if not path.exists():
|
||||||
|
return f"File not found: {path}", 404
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.startfile(path)
|
||||||
|
return "File opened successfully"
|
||||||
|
except Exception as e:
|
||||||
|
return f"Failed to open {path}. Error: {e}", 500
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True, host="0.0.0.0")
|
||||||
5
desktop_env/server/requirements.txt
Normal file
5
desktop_env/server/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
python3-xlib==0.15
|
||||||
|
PyAutoGUI==0.9.54
|
||||||
|
Pillow==10.1.0
|
||||||
|
git+https://github.com/moses-palmer/pynput.git@refs/pull/541/head # to make sure that it works on Apple Silicon
|
||||||
|
requests
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
from flask import Flask, request, jsonify
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
@app.route('/execute', methods=['POST'])
|
|
||||||
def execute_command():
|
|
||||||
data = request.json
|
|
||||||
# The 'command' key in the JSON request should contain the command to be executed.
|
|
||||||
command = data.get('command', '')
|
|
||||||
|
|
||||||
# Execute the command without any safety checks.
|
|
||||||
try:
|
|
||||||
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
stdout, stderr = process.communicate()
|
|
||||||
|
|
||||||
return jsonify({
|
|
||||||
'status': 'success',
|
|
||||||
'output': stdout.decode(),
|
|
||||||
'error': stderr.decode()
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
return jsonify({
|
|
||||||
'status': 'error',
|
|
||||||
'message': str(e)
|
|
||||||
}), 500
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app.run(debug=True, host="0.0.0.0")
|
|
||||||
24
evaluation_examples/README.md
Normal file
24
evaluation_examples/README.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Evaluation examples
|
||||||
|
|
||||||
|
Here we put the data examples to benchmark the ability of agents when interacting with GUI.
|
||||||
|
The examples are stored in `./examples` where each data item formatted as:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"id": "uid", # unique id
|
||||||
|
"snapshot": "snapshot_id", # the snapshot id of the environment, with some data already there and apps already opened, or just desktop
|
||||||
|
"instruction": "natural_language_instruction", # the natural language instruction of the task, what we want the agent to do
|
||||||
|
"source": "website_url", # where we know this example, some forum, or some website, or some paper
|
||||||
|
"config": {xxx}, # the scripts to setup the donwload and open files actions, as the initial state of a task
|
||||||
|
"trajectory": "trajectory_directory", # the trajectory directory, which contains the action sequence file, the screenshots and the recording video
|
||||||
|
"related_apps": ["app1", "app2", ...], # the related apps, which are opened during the task
|
||||||
|
"evaluator": "evaluation_dir", # the directory of the evaluator, which contains the evaluation script for this example
|
||||||
|
…
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `./trajectories` file contains the annotated trajectories for each data item in `./examples` for finishing the task.
|
||||||
|
|
||||||
|
For now, it is under construction, and only tested on Windows 10. Please:
|
||||||
|
- Modify the path accordingly to run the evaluation;
|
||||||
|
- Remind us if some parts are overfit to our environment.
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "I would like to pad all the numbers in the 'Old ID' column with zeros in front, to fill them up to seven digits in the 'New 7 Digit ID' column.",
|
||||||
|
"source": "https://www.youtube.com/shorts/FPAQaDTS8VY",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Make sparkline chart line by line",
|
||||||
|
"source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"id": "37608790-6147-45d0-9f20-1137bb35703d",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Help me fill the columns of First Name, Last Name and Rank",
|
||||||
|
"source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": {
|
||||||
|
"func": "compare_table(expected, actual)",
|
||||||
|
"paths": {
|
||||||
|
"expected": {
|
||||||
|
"type": "cloud_file",
|
||||||
|
"path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747"
|
||||||
|
},
|
||||||
|
"actual": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Reorder the columns to be \"Data\", \"First Name\", \"Last Name\", \"Order ID\", \"Sales\"",
|
||||||
|
"source": "https://www.youtube.com/shorts/bvUhr1AHs44",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "7b802dad-6e0f-4204-9815-d4e3f57627d8",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.",
|
||||||
|
"source": "https://www.youtube.com/shorts/Of-lzeP1usE",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "7efeb4b1-3d19-4762-b163-63328d66303b",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Fill in the Serieal Numbers in \"Serial #\" column",
|
||||||
|
"source": "https://www.youtube.com/shorts/4jzXfZNhfmk",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Clean the messy movie titles and put them in the cleaned column",
|
||||||
|
"source": "https://www.youtube.com/shorts/A0gmEBRKXWs",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/a9f325aa-8c05-4e4f-8341-9e4358565f4f",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"id": "d681960f-7bc3-4286-9913-a8812ba3261a",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "According to the green table shown above, calculate and give each student a grade",
|
||||||
|
"source": "https://www.youtube.com/shorts/d7U1S_IsTVM",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"https://drive.usercontent.google.com/download?id=1wodZjx1KjThUsrtF6ZJaCTy1fQX4E9vA&export=download&authuser=0&confirm=t&uuid=d07ca312-1abc-40f2-81cd-d06e27119854&at=APZUnTWwjnxsHQYapSvpLR8NmlfV:1701785087048",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/d681960f-7bc3-4286-9913-a8812ba3261a",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": {
|
||||||
|
"func": "compare_table(expected, actual)",
|
||||||
|
"paths": {
|
||||||
|
"expected": {
|
||||||
|
"type": "cloud_file",
|
||||||
|
"path": "https://drive.usercontent.google.com/download?id=1kfEHJH1n0yCsQp443IIFvdD9uWv0DWMr&export=download&authuser=0&confirm=t&uuid=d9907f65-8d39-4ecc-8747-b4ed7e6011f5&at=APZUnTXpPAnlh5sD6q-R8oQtqL6g:1702362952170"
|
||||||
|
},
|
||||||
|
"actual": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Traverse the table and paste it below",
|
||||||
|
"source": "https://www.youtube.com/shorts/t9JLUaT55UQ",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"id": "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Enable each cell in the column\"Pass/Fail/Held\" is a drop down list",
|
||||||
|
"source": "https://www.youtube.com/shorts/tXOovKn0H68",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"id": "f9584479-3d0d-4c79-affa-9ad7afdd8850",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "Fill the missing row and column which show the total value",
|
||||||
|
"source": "https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb",
|
||||||
|
"config": {
|
||||||
|
"download": [
|
||||||
|
[
|
||||||
|
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"open": [
|
||||||
|
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/f9584479-3d0d-4c79-affa-9ad7afdd8850",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": {
|
||||||
|
"func": "compare_table(expected, actual)",
|
||||||
|
"paths": {
|
||||||
|
"expected": {
|
||||||
|
"type": "cloud_file",
|
||||||
|
"path": "https://drive.usercontent.google.com/download?id=17f1wZuJPvUEc5at_Fy3c18VFdOk0x7xz&export=download&authuser=0&confirm=t&uuid=6d2edffd-0ce0-426e-9820-8af25b4667f3&at=APZUnTVh7JS85dwZBaV2hytWQgDK:1702361510956"
|
||||||
|
},
|
||||||
|
"actual": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
13
evaluation_examples/examples/template.json
Normal file
13
evaluation_examples/examples/template.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"id": "",
|
||||||
|
"snapshot": "libreoffice_calc",
|
||||||
|
"instruction": "",
|
||||||
|
"source": "",
|
||||||
|
"config": {
|
||||||
|
},
|
||||||
|
"trajectory": "trajectories/",
|
||||||
|
"related_apps": [
|
||||||
|
"libreoffice calc"
|
||||||
|
],
|
||||||
|
"evaluator": "evaluation_dir"
|
||||||
|
}
|
||||||
87
main.py
87
main.py
@@ -1,56 +1,51 @@
|
|||||||
from pprint import pprint
|
import json
|
||||||
from desktop_env.envs.desktop_env import DesktopEnv, Action, MouseClick
|
from desktop_env.envs.desktop_env import DesktopEnv
|
||||||
|
|
||||||
def get_human_action():
|
|
||||||
"""
|
|
||||||
Prompts the human player for an action and returns a structured action.
|
|
||||||
"""
|
|
||||||
print("\nAvailable actions:", [action.name for action in Action])
|
|
||||||
action_type = None
|
|
||||||
while action_type not in [action.value for action in Action]:
|
|
||||||
action_type = Action[input("Enter the type of action: ".strip())].value
|
|
||||||
|
|
||||||
action = {"action_type": action_type}
|
|
||||||
|
|
||||||
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
|
|
||||||
print("\n Available clicks:", [action.name for action in MouseClick])
|
|
||||||
click_type = input("Enter click type: ")
|
|
||||||
action["click_type"] = MouseClick[click_type].value
|
|
||||||
|
|
||||||
if action_type == Action.MOUSE_MOVE.value:
|
|
||||||
x = int(input("Enter x-coordinate for mouse move: "))
|
|
||||||
y = int(input("Enter y-coordinate for mouse move: "))
|
|
||||||
action["x"] = x
|
|
||||||
action["y"] = y
|
|
||||||
|
|
||||||
if action_type == Action.KEY.value:
|
|
||||||
key = input("Enter the key to press: ")
|
|
||||||
action["key"] = [ord(c) for c in key]
|
|
||||||
|
|
||||||
if action_type == Action.TYPE.value:
|
|
||||||
text = input("Enter the text to type: ")
|
|
||||||
action["text"] = [ord(c) for c in text]
|
|
||||||
|
|
||||||
return action
|
|
||||||
|
|
||||||
|
|
||||||
def human_agent():
|
def human_agent():
|
||||||
"""
|
"""
|
||||||
Runs the Gym environment with human input.
|
Runs the Gym environment with human input.
|
||||||
"""
|
"""
|
||||||
env = DesktopEnv(path_to_vm="/home/yuri/vmware/Windows 10 x64/Windows 10 x64.vmx",
|
|
||||||
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
with open("evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json", "r") as f:
|
||||||
username="user",
|
example = json.load(f)
|
||||||
password="password",
|
|
||||||
# host="192.168.7.128",
|
env = DesktopEnv(
|
||||||
host="http://192.168.7.129:5000",
|
# path_to_vm=r"""C:\Users\tianbaox\Downloads\Windows 10 x64\Windows 10 x64.vmx""",
|
||||||
vm_os="windows")
|
path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""",
|
||||||
|
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
||||||
|
action_space="computer_13",
|
||||||
|
snapshot_path="base_setup3",
|
||||||
|
instruction=example["instruction"],
|
||||||
|
config=example["config"],
|
||||||
|
evaluator=example["evaluator"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# reset the environment to certain snapshot
|
||||||
observation = env.reset()
|
observation = env.reset()
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
while not done:
|
trajectory = [
|
||||||
action = get_human_action()
|
{
|
||||||
observation, reward, done, info = env.step(action)
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 754,
|
||||||
|
"y": 1057
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
|
||||||
|
]
|
||||||
|
|
||||||
|
for i in range(len(trajectory)):
|
||||||
|
# action = get_human_action()
|
||||||
|
|
||||||
|
# action = {
|
||||||
|
# "action_type": 0,
|
||||||
|
# "click_type": 3,
|
||||||
|
# }
|
||||||
|
print(trajectory[i])
|
||||||
|
|
||||||
|
observation, reward, done, info = env.step(trajectory[i], pause=5)
|
||||||
print("Observation:", observation)
|
print("Observation:", observation)
|
||||||
print("Reward:", reward)
|
print("Reward:", reward)
|
||||||
print("Info:", info)
|
print("Info:", info)
|
||||||
@@ -61,8 +56,12 @@ def human_agent():
|
|||||||
print("The episode is done.")
|
print("The episode is done.")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
result = env.evaluate()
|
||||||
|
print("Result:", result)
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
print("Environment closed.")
|
print("Environment closed.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
human_agent()
|
human_agent()
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
|
# fixme: Need to be rewrite on new action space
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import base64
|
import base64
|
||||||
from desktop_env.envs.desktop_env import Action, MouseClick
|
from desktop_env.envs.desktop_env import Action, MouseClick
|
||||||
import json5
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
from mm_agents.gpt_4v_prompt import SYS_PROMPT
|
||||||
|
|
||||||
|
|
||||||
# Function to encode the image
|
# Function to encode the image
|
||||||
@@ -11,6 +15,38 @@ def encode_image(image_path):
|
|||||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_actions_from_string(input_string):
|
||||||
|
# Search for a JSON string within the input string
|
||||||
|
actions = []
|
||||||
|
matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
|
||||||
|
if matches:
|
||||||
|
# Assuming there's only one match, parse the JSON string into a dictionary
|
||||||
|
try:
|
||||||
|
for match in matches:
|
||||||
|
action_dict = json.loads(match)
|
||||||
|
actions.append(action_dict)
|
||||||
|
return actions
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
return f"Failed to parse JSON: {e}"
|
||||||
|
else:
|
||||||
|
matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
|
||||||
|
if matches:
|
||||||
|
# Assuming there's only one match, parse the JSON string into a dictionary
|
||||||
|
try:
|
||||||
|
for match in matches:
|
||||||
|
action_dict = json.loads(match)
|
||||||
|
actions.append(action_dict)
|
||||||
|
return actions
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
return f"Failed to parse JSON: {e}"
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
action_dict = json.loads(input_string)
|
||||||
|
return [action_dict]
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError("Invalid response format: " + input_string)
|
||||||
|
|
||||||
|
|
||||||
class GPT4v_Agent:
|
class GPT4v_Agent:
|
||||||
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
|
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
|
||||||
self.instruction = instruction
|
self.instruction = instruction
|
||||||
@@ -22,18 +58,13 @@ class GPT4v_Agent:
|
|||||||
"Authorization": f"Bearer {api_key}"
|
"Authorization": f"Bearer {api_key}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# load prompt from file
|
|
||||||
self.prompt = ""
|
|
||||||
with open("gpt_4v_prompt.txt", "r") as f:
|
|
||||||
self.prompt = f.read()
|
|
||||||
|
|
||||||
self.trajectory = [
|
self.trajectory = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": self.prompt
|
"text": SYS_PROMPT
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -56,6 +87,12 @@ class GPT4v_Agent:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
})
|
})
|
||||||
|
traj_to_show = []
|
||||||
|
for i in range(len(self.trajectory)):
|
||||||
|
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
|
||||||
|
if len(self.trajectory[i]["content"]) > 1:
|
||||||
|
traj_to_show.append("screenshot_obs")
|
||||||
|
print("Trajectory:", traj_to_show)
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": self.trajectory,
|
"messages": self.trajectory,
|
||||||
@@ -63,11 +100,15 @@ class GPT4v_Agent:
|
|||||||
}
|
}
|
||||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
|
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
|
||||||
|
|
||||||
action = self.parse_action(response.json()['choices'][0]['message']['content'])
|
try:
|
||||||
|
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
|
||||||
|
except:
|
||||||
|
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
|
||||||
|
actions = None
|
||||||
|
|
||||||
return action
|
return actions
|
||||||
|
|
||||||
def parse_action(self, response: str):
|
def parse_actions(self, response: str):
|
||||||
# response example
|
# response example
|
||||||
"""
|
"""
|
||||||
```json
|
```json
|
||||||
@@ -79,12 +120,7 @@ class GPT4v_Agent:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# parse from the response
|
# parse from the response
|
||||||
if response.startswith("```json"):
|
actions = parse_actions_from_string(response)
|
||||||
action = json5.loads(response[7:-3])
|
|
||||||
elif response.startswith("```"):
|
|
||||||
action = json5.loads(response[3:-3])
|
|
||||||
else:
|
|
||||||
action = json5.loads(response)
|
|
||||||
|
|
||||||
# add action into the trajectory
|
# add action into the trajectory
|
||||||
self.trajectory.append({
|
self.trajectory.append({
|
||||||
@@ -98,25 +134,28 @@ class GPT4v_Agent:
|
|||||||
})
|
})
|
||||||
|
|
||||||
# parse action
|
# parse action
|
||||||
parsed_action = {}
|
parsed_actions = []
|
||||||
action_type = Action[action['action_type']].value
|
for action in actions:
|
||||||
parsed_action["action_type"] = action_type
|
parsed_action = {}
|
||||||
|
action_type = Action[action['action_type']].value
|
||||||
|
parsed_action["action_type"] = action_type
|
||||||
|
|
||||||
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
|
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
|
||||||
parsed_action["click_type"] = MouseClick[action['click_type']].value
|
parsed_action["click_type"] = MouseClick[action['click_type']].value
|
||||||
|
|
||||||
if action_type == Action.MOUSE_MOVE.value:
|
if action_type == Action.MOUSE_MOVE.value:
|
||||||
parsed_action["x"] = action["x"]
|
parsed_action["x"] = action["x"]
|
||||||
parsed_action["y"] = action["y"]
|
parsed_action["y"] = action["y"]
|
||||||
|
|
||||||
# fixme: could these two actions be merged??
|
if action_type == Action.KEY.value:
|
||||||
if action_type == Action.KEY.value:
|
parsed_action["key"] = action["key"] # handle the condition of single key and multiple keys
|
||||||
parsed_action["key"] = [ord(c) for c in action["key"]]
|
|
||||||
|
|
||||||
if action_type == Action.TYPE.value:
|
if action_type == Action.TYPE.value:
|
||||||
parsed_action["text"] = [ord(c) for c in action["text"]]
|
parsed_action["text"] = action["text"]
|
||||||
|
|
||||||
return parsed_action
|
parsed_actions.append(parsed_action)
|
||||||
|
|
||||||
|
return parsed_actions
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -125,4 +164,3 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
agent = GPT4v_Agent(api_key=api_key, instruction="Open Google Sheet")
|
agent = GPT4v_Agent(api_key=api_key, instruction="Open Google Sheet")
|
||||||
print(agent.predict(obs="stackoverflow.png"))
|
print(agent.predict(obs="stackoverflow.png"))
|
||||||
|
|
||||||
|
|||||||
54
mm_agents/gpt_4v_prompt_action.py
Normal file
54
mm_agents/gpt_4v_prompt_action.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
SYS_PROMPT = """
|
||||||
|
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
|
||||||
|
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
|
||||||
|
Here is the description of the action space:
|
||||||
|
|
||||||
|
Firstly you need to predict the class of your action, select from one below:
|
||||||
|
- **MOUSE_MOVE**: move the mouse to a specific position
|
||||||
|
- **CLICK**: click on the screen
|
||||||
|
- **MOUSE_DOWN**: press the mouse button
|
||||||
|
- **MOUSE_UP**: release the mouse button
|
||||||
|
- **KEY**: press a key on the keyboard
|
||||||
|
- **KEY_DOWN**: press a key on the keyboard
|
||||||
|
- **KEY_UP**: release a key on the keyboard
|
||||||
|
- **TYPE**: type a string on the keyboard
|
||||||
|
|
||||||
|
Then you need to predict the parameters of your action:
|
||||||
|
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
|
||||||
|
for example, format as:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_MOVE",
|
||||||
|
"x": 1319.11,
|
||||||
|
"y": 65.06
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
|
||||||
|
for example, format as:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"click_type": "LEFT"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
|
||||||
|
for example, format as:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"action_type": "KEY",
|
||||||
|
"key": "ctrl+c"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- For TYPE, you need to specify the text you want to type
|
||||||
|
for example, format as:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"action_type": "TYPE",
|
||||||
|
"text": "hello world"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For every step, you should only return the action_type and the parameters of your action as a dict, without any other things. You MUST wrap the dict with backticks (\`).
|
||||||
|
You can predict multiple actions at one step, but you should only return one action for each step.
|
||||||
|
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
|
||||||
|
"""
|
||||||
8
mm_agents/gpt_4v_prompt_code.py
Normal file
8
mm_agents/gpt_4v_prompt_code.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
SYS_PROMPT = """
|
||||||
|
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
|
||||||
|
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
|
||||||
|
|
||||||
|
You are required to use `pyautogui` to perform the action.
|
||||||
|
Return one line or multiple lines of python code to perform the action each time, be time efficient.
|
||||||
|
Return `None` if you cannot perform the action.
|
||||||
|
"""
|
||||||
124
mm_agents/sam_test.py
Normal file
124
mm_agents/sam_test.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
import requests
|
||||||
|
from transformers import SamModel, SamProcessor
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
||||||
|
|
||||||
|
def show_mask(mask, ax, random_color=False):
|
||||||
|
if random_color:
|
||||||
|
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
|
||||||
|
else:
|
||||||
|
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
|
||||||
|
h, w = mask.shape[-2:]
|
||||||
|
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
|
||||||
|
ax.imshow(mask_image)
|
||||||
|
|
||||||
|
|
||||||
|
def show_box(box, ax):
|
||||||
|
x0, y0 = box[0], box[1]
|
||||||
|
w, h = box[2] - box[0], box[3] - box[1]
|
||||||
|
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
|
||||||
|
|
||||||
|
|
||||||
|
def show_boxes_on_image(raw_image, boxes):
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.imshow(raw_image)
|
||||||
|
for box in boxes:
|
||||||
|
show_box(box, plt.gca())
|
||||||
|
plt.axis('on')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def show_points_on_image(raw_image, input_points, input_labels=None):
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.imshow(raw_image)
|
||||||
|
input_points = np.array(input_points)
|
||||||
|
if input_labels is None:
|
||||||
|
labels = np.ones_like(input_points[:, 0])
|
||||||
|
else:
|
||||||
|
labels = np.array(input_labels)
|
||||||
|
show_points(input_points, labels, plt.gca())
|
||||||
|
plt.axis('on')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.imshow(raw_image)
|
||||||
|
input_points = np.array(input_points)
|
||||||
|
if input_labels is None:
|
||||||
|
labels = np.ones_like(input_points[:, 0])
|
||||||
|
else:
|
||||||
|
labels = np.array(input_labels)
|
||||||
|
show_points(input_points, labels, plt.gca())
|
||||||
|
for box in boxes:
|
||||||
|
show_box(box, plt.gca())
|
||||||
|
plt.axis('on')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.imshow(raw_image)
|
||||||
|
input_points = np.array(input_points)
|
||||||
|
if input_labels is None:
|
||||||
|
labels = np.ones_like(input_points[:, 0])
|
||||||
|
else:
|
||||||
|
labels = np.array(input_labels)
|
||||||
|
show_points(input_points, labels, plt.gca())
|
||||||
|
for box in boxes:
|
||||||
|
show_box(box, plt.gca())
|
||||||
|
plt.axis('on')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def show_points(coords, labels, ax, marker_size=375):
|
||||||
|
pos_points = coords[labels == 1]
|
||||||
|
neg_points = coords[labels == 0]
|
||||||
|
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
|
||||||
|
linewidth=1.25)
|
||||||
|
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
|
||||||
|
linewidth=1.25)
|
||||||
|
|
||||||
|
|
||||||
|
def show_masks_on_image(raw_image, masks, scores):
|
||||||
|
if len(masks.shape) == 4:
|
||||||
|
masks = masks.squeeze()
|
||||||
|
if scores.shape[0] == 1:
|
||||||
|
scores = scores.squeeze()
|
||||||
|
|
||||||
|
nb_predictions = scores.shape[-1]
|
||||||
|
fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
|
||||||
|
|
||||||
|
for i, (mask, score) in enumerate(zip(masks, scores)):
|
||||||
|
mask = mask.cpu().detach()
|
||||||
|
axes[i].imshow(np.array(raw_image))
|
||||||
|
show_mask(mask, axes[i])
|
||||||
|
axes[i].title.set_text(f"Mask {i + 1}, Score: {score.item():.3f}")
|
||||||
|
axes[i].axis("off")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
|
||||||
|
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
|
||||||
|
|
||||||
|
img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
|
||||||
|
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
|
||||||
|
|
||||||
|
plt.imshow(raw_image)
|
||||||
|
|
||||||
|
inputs = processor(raw_image, return_tensors="pt").to(device)
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = model(**inputs)
|
||||||
|
|
||||||
|
masks = processor.image_processor.post_process_masks(
|
||||||
|
outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
scores = outputs.iou_scores
|
||||||
|
show_masks_on_image(raw_image, masks[0], scores)
|
||||||
@@ -1,8 +1,16 @@
|
|||||||
numpy
|
numpy~=1.24.3
|
||||||
Pillow
|
Pillow~=10.1.0
|
||||||
fabric
|
fabric
|
||||||
gymnasium
|
gymnasium~=0.28.1
|
||||||
requests
|
requests~=2.31.0
|
||||||
transformers
|
transformers~=4.35.2
|
||||||
torch
|
torch~=2.1.1+cu118
|
||||||
accelerate
|
accelerate
|
||||||
|
opencv-python~=4.8.1.78
|
||||||
|
matplotlib~=3.7.4
|
||||||
|
pynput~=1.7.6
|
||||||
|
pyautogui~=0.9.54
|
||||||
|
psutil~=5.9.6
|
||||||
|
tqdm~=4.65.0
|
||||||
|
pandas~=2.0.3
|
||||||
|
flask~=3.0.0
|
||||||
BIN
screenshot.png
BIN
screenshot.png
Binary file not shown.
|
Before Width: | Height: | Size: 356 KiB After Width: | Height: | Size: 826 KiB |
1
utils/complex_clicking.json
Normal file
1
utils/complex_clicking.json
Normal file
File diff suppressed because one or more lines are too long
1788
utils/complex_clicking.jsonl
Normal file
1788
utils/complex_clicking.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,76 +3,97 @@ import sys, pathlib;
|
|||||||
sys.path.append(str(pathlib.Path(__file__).parents[1]))
|
sys.path.append(str(pathlib.Path(__file__).parents[1]))
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import math
|
||||||
import json
|
import json
|
||||||
|
import numpy as np
|
||||||
from typing import List
|
from typing import List
|
||||||
from desktop_env.envs.desktop_env import Action, MouseClick
|
from copy import deepcopy
|
||||||
|
|
||||||
|
pynput2pyautogui_key = {
|
||||||
|
"alt_l": "altleft",
|
||||||
|
"alt_r": "altright",
|
||||||
|
}
|
||||||
|
COMMAND_KEYS = ['accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright', 'alt_l', 'alt_r']
|
||||||
|
typingkey2str = {
|
||||||
|
"space" : " ",
|
||||||
|
}
|
||||||
|
|
||||||
class DuckTrackEventActionConverter:
|
class DuckTrackEventActionConverter:
|
||||||
def __init__(self, human_readable: str, compress_move: bool = True):
|
def __init__(self, ):
|
||||||
self.human_readable = human_readable
|
""""""
|
||||||
self.compress_move = compress_move
|
|
||||||
|
|
||||||
def enum_to_str(self, enum):
|
### Enumerations ###
|
||||||
"""Converts an enum to its string representation if HUMAN_READABLE is True, otherwise returns its value."""
|
def move_event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
return enum.name if self.human_readable else enum.value
|
"""Converts a mouse move event to its corresponding action."""
|
||||||
|
if action_space == "computer_13":
|
||||||
|
return {
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": event["x"],
|
||||||
|
"y": event["y"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elif action_space == "pyautogui":
|
||||||
|
return "pyautogui.moveTo({}, {})".format(event["x"], event["y"])
|
||||||
|
|
||||||
def compress_mouse_move(self, data: List[dict], index: int):
|
def click_event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
"""Compresses consecutive mouse move events into first and last move events."""
|
"""Converts a mouse click event to its corresponding action."""
|
||||||
first_move, last_move = data[index], data[index]
|
action = {
|
||||||
while index < len(data) and data[index]["action"] == "move":
|
"action_type": None,
|
||||||
last_move = data[index]
|
"parameters": {
|
||||||
index += 1
|
"button": None
|
||||||
return first_move, last_move, index
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def move_event_to_action(self, event: dict):
|
|
||||||
return {"action_type": self.enum_to_str(Action.MOUSE_MOVE),
|
|
||||||
"x": event["x"],
|
|
||||||
"y": event["y"]}
|
|
||||||
|
|
||||||
def click_event_to_action(self, event: dict):
|
|
||||||
action = {}
|
|
||||||
mouse_button = event["button"]
|
mouse_button = event["button"]
|
||||||
mouse_pressed = event["pressed"]
|
mouse_pressed = event["pressed"]
|
||||||
|
|
||||||
if mouse_pressed:
|
if mouse_pressed:
|
||||||
action["action_type"] = self.enum_to_str(Action.MOUSE_DOWN)
|
action["action_type"] = "MOUSE_DOWN"
|
||||||
elif not mouse_pressed:
|
elif not mouse_pressed:
|
||||||
action["action_type"] = self.enum_to_str(Action.MOUSE_UP)
|
action["action_type"] = "MOUSE_UP"
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(mouse_pressed)
|
raise NotImplementedError(mouse_pressed)
|
||||||
|
|
||||||
if mouse_button == "left":
|
if mouse_button in ["left", "right", "middle"]:
|
||||||
action["click_type"] = self.enum_to_str(MouseClick.LEFT)
|
action["parameters"]["button"] = mouse_button
|
||||||
elif mouse_button == "right":
|
|
||||||
action["click_type"] = self.enum_to_str(MouseClick.RIGHT)
|
|
||||||
elif mouse_button == "middle":
|
|
||||||
action["click_type"] = self.enum_to_str(MouseClick.MIDDLE)
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(mouse_button)
|
raise NotImplementedError(mouse_button)
|
||||||
|
|
||||||
return action
|
return action
|
||||||
|
|
||||||
def press_event_to_action(self, event: dict):
|
def press_event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
return {"action_type": self.enum_to_str(Action.KEY_DOWN),
|
"""Converts a key down event to its corresponding action."""
|
||||||
"key": [ord(c) for c in event["name"]]}
|
# NOTE: the `key down`, `press` have the same meaning here, while different in pyautogui
|
||||||
|
return {
|
||||||
|
"action_type": "KEY_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
|
||||||
|
event["name"]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def release_event_to_action(self, event: dict):
|
def release_event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
return {"action_type": self.enum_to_str(Action.KEY_UP),
|
"""Converts a key release event to its corresponding action."""
|
||||||
"key": [ord(c) for c in event["name"]]}
|
return {
|
||||||
|
"action_type": "KEY_UP",
|
||||||
|
"parameters": {
|
||||||
|
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
|
||||||
|
event["name"]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def scroll_event_to_action(self, event: dict):
|
def scroll_event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
# TODO: need to confirm if df < 0 means scroll up or down
|
"""Converts a scroll event to its corresponding action."""
|
||||||
if event["dy"] < 0:
|
return {
|
||||||
down = False
|
"action_type": "SCROLL",
|
||||||
else:
|
"parameters": {
|
||||||
down = True
|
"dx": event["dx"],
|
||||||
|
"dy": event["dy"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {"action_type": self.enum_to_str(Action.CLICK),
|
def event_to_action(self, event: dict, action_space: str = "computer_13"):
|
||||||
"click_type": self.enum_to_str(MouseClick.WHEEL_DOWN) if down else self.enum_to_str(
|
|
||||||
MouseClick.WHEEL_UP)}
|
|
||||||
|
|
||||||
def event_to_action(self, event: dict):
|
|
||||||
"""Converts an event to its corresponding action based on the event type."""
|
"""Converts an event to its corresponding action based on the event type."""
|
||||||
if event["action"] == "move":
|
if event["action"] == "move":
|
||||||
return self.move_event_to_action(event)
|
return self.move_event_to_action(event)
|
||||||
@@ -87,114 +108,243 @@ class DuckTrackEventActionConverter:
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError(event["action"])
|
raise NotImplementedError(event["action"])
|
||||||
|
|
||||||
def ducktrack_event_file_to_action(self, ducktrack_event_file: str, out_file: str, compress_move: bool = None):
|
### Compressing ###
|
||||||
|
def compress_mouse_move(self, data: List[dict], index: int):
|
||||||
|
"""Compresses consecutive mouse move events into the last move events."""
|
||||||
|
last_move = data[index]
|
||||||
|
while index < len(data) and data[index]["action"] == "move":
|
||||||
|
last_move = data[index]
|
||||||
|
index += 1
|
||||||
|
return last_move, index
|
||||||
|
|
||||||
|
def compress_scroll(self, data: List[dict], index: int):
|
||||||
|
"""Compresses consecutive scroll events into a single scroll event."""
|
||||||
|
last_scroll = data[index]
|
||||||
|
consecutive_dx, consecutive_dy = data[index]["dx"], data[index]["dy"]
|
||||||
|
while index < len(data) and data[index]["action"] == "scroll" and np.sign(data[index]["dx"]) == np.sign(consecutive_dx) and np.sign(data[index]["dy"]) == np.sign(consecutive_dy):
|
||||||
|
last_scroll = data[index]
|
||||||
|
consecutive_dx += data[index]["dx"]
|
||||||
|
consecutive_dy += data[index]["dy"]
|
||||||
|
index += 1
|
||||||
|
last_scroll["dx"], last_scroll["dy"] = consecutive_dx, consecutive_dy
|
||||||
|
return last_scroll, index
|
||||||
|
|
||||||
|
### Converting ###
|
||||||
|
def ducktrack_event_file_to_action(self, ducktrack_event_file: str, out_file: str, compress_move: bool = True, compress_scroll: bool = True, compress_click: bool = True,compress_drag: bool = True, compress_press_key: bool = True, compress_typing: bool = True):
|
||||||
"""Converts DuckTrack event data to a list of actions and saves them to a file."""
|
"""Converts DuckTrack event data to a list of actions and saves them to a file."""
|
||||||
if not os.path.exists(ducktrack_event_file):
|
if not os.path.exists(ducktrack_event_file):
|
||||||
raise FileNotFoundError(ducktrack_event_file)
|
raise FileNotFoundError(ducktrack_event_file)
|
||||||
|
|
||||||
# set to default
|
|
||||||
if compress_move is None:
|
|
||||||
compress_move = self.compress_move
|
|
||||||
|
|
||||||
with open(ducktrack_event_file, 'r') as file:
|
with open(ducktrack_event_file, 'r') as file:
|
||||||
data = [json.loads(line) for line in file]
|
events = [json.loads(line) for line in file]
|
||||||
|
|
||||||
result = {"action": [], "event": []}
|
# Save the compressed actions in a list
|
||||||
|
result = []
|
||||||
index = 0
|
index = 0
|
||||||
|
presses_to_skip = 0
|
||||||
|
releases_to_skip = 0
|
||||||
|
move_to_skip = 0
|
||||||
|
keys_pressed = []
|
||||||
|
|
||||||
# Compress the mouse move events
|
# Compress the mouse move events
|
||||||
while index < len(data):
|
while index < len(events):
|
||||||
event = data[index]
|
|
||||||
if event["action"] == "move" and compress_move:
|
event = events[index]
|
||||||
first_move, last_move, index = self.compress_mouse_move(data, index)
|
|
||||||
result["action"].extend([self.event_to_action(last_move)])
|
def do_mouse_press(button: str, _index: int):
|
||||||
result["event"].extend([last_move])
|
|
||||||
else:
|
num_clicks = 0
|
||||||
result["action"].append(self.event_to_action(event))
|
mouse_pressed = True
|
||||||
result["event"].append(event)
|
skip_move = 0
|
||||||
|
click_x, click_y = event["x"], event["y"]
|
||||||
|
|
||||||
|
for j, next_event in enumerate(events[index + 1:]):
|
||||||
|
# make sure the time between mouse clicks is less than 500ms
|
||||||
|
if next_event["time_stamp"] - event["time_stamp"] > 0.5:
|
||||||
|
if num_clicks > 0:
|
||||||
|
if result[-1:][0]["action_type"] == "MOVE_TO":
|
||||||
|
result.pop()
|
||||||
|
result.append({
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"parameters": {
|
||||||
|
"button": button,
|
||||||
|
"x" : click_x,
|
||||||
|
"y" : click_y,
|
||||||
|
"num_clicks": num_clicks
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return num_clicks-1, num_clicks, _index, skip_move
|
||||||
|
break
|
||||||
|
|
||||||
|
if "x" in next_event and "y" in next_event:
|
||||||
|
# if the mouse moves out of the click radius/rectangle, it is not a click sequence
|
||||||
|
if math.sqrt((next_event["y"] - event["y"]) ** 2 +
|
||||||
|
(next_event["x"] - event["x"]) ** 2) > 4:
|
||||||
|
if num_clicks > 0:
|
||||||
|
if result[-1:][0]["action_type"] == "MOVE_TO":
|
||||||
|
result.pop()
|
||||||
|
result.append({
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"parameters": {
|
||||||
|
"button": button,
|
||||||
|
"x" : click_x,
|
||||||
|
"y" : click_y,
|
||||||
|
"num_clicks": num_clicks
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return num_clicks-1, num_clicks, _index, skip_move
|
||||||
|
break
|
||||||
|
|
||||||
|
if next_event["action"] == "click" and compress_click:
|
||||||
|
if not next_event["pressed"]:
|
||||||
|
num_clicks += 1
|
||||||
|
mouse_pressed = False
|
||||||
|
if num_clicks == 3:
|
||||||
|
if result[-1:][0]["action_type"] == "MOVE_TO":
|
||||||
|
result.pop()
|
||||||
|
result.append({
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"parameters": {
|
||||||
|
"button": button,
|
||||||
|
"x" : click_x,
|
||||||
|
"y" : click_y,
|
||||||
|
"num_clicks": 3
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return 2, 3, _index, skip_move
|
||||||
|
elif next_event["pressed"]:
|
||||||
|
mouse_pressed = True
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(next_event["pressed"])
|
||||||
|
elif next_event["action"] != "click" and not mouse_pressed:
|
||||||
|
if next_event["action"] == "move":
|
||||||
|
if next_event["x"] == click_x and next_event["y"] == click_y:
|
||||||
|
skip_move += 1
|
||||||
|
continue
|
||||||
|
if result[-1:][0]["action_type"] == "MOVE_TO":
|
||||||
|
result.pop()
|
||||||
|
result.append({
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"parameters": {
|
||||||
|
"button": button,
|
||||||
|
"x" : click_x,
|
||||||
|
"y" : click_y,
|
||||||
|
"num_clicks": num_clicks
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return num_clicks-1, num_clicks, _index, skip_move
|
||||||
|
|
||||||
|
# Compress {MOUSE_DOWN, MOVE, MOUSE_UP} into DRAG_TO event
|
||||||
|
elif next_event["action"] == "move" and compress_drag:
|
||||||
|
if next_event["x"] == click_x and next_event["y"] == click_y:
|
||||||
|
skip_move += 1
|
||||||
|
continue
|
||||||
|
last_move, _index = self.compress_mouse_move(events, _index+1)
|
||||||
|
result.append({
|
||||||
|
"action_type": "DRAG_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": last_move["x"],
|
||||||
|
"y": last_move["y"]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return 0, 1, _index, skip_move
|
||||||
|
|
||||||
|
result.append({
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"button": button
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return 0, 0, _index, skip_move
|
||||||
|
|
||||||
|
if event["action"] == "move":
|
||||||
|
if move_to_skip > 0:
|
||||||
|
move_to_skip -= 1
|
||||||
|
index += 1
|
||||||
|
continue
|
||||||
|
if compress_move:
|
||||||
|
last_move, index = self.compress_mouse_move(events, index)
|
||||||
|
result.extend([self.event_to_action(last_move)])
|
||||||
|
|
||||||
|
elif event["action"] == "scroll" and compress_scroll:
|
||||||
|
last_scroll, index = self.compress_scroll(events, index)
|
||||||
|
result.extend([self.event_to_action(last_scroll)])
|
||||||
|
|
||||||
|
elif event["action"] == "click":
|
||||||
|
button = event["button"]
|
||||||
|
|
||||||
|
if event["pressed"]:
|
||||||
|
if presses_to_skip == 0:
|
||||||
|
presses, releases, index, moves = do_mouse_press(button, index)
|
||||||
|
presses_to_skip += presses
|
||||||
|
releases_to_skip += releases
|
||||||
|
move_to_skip += moves
|
||||||
|
else:
|
||||||
|
presses_to_skip -= 1
|
||||||
|
else:
|
||||||
|
if releases_to_skip == 0:
|
||||||
|
result.append({
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"parameters": {
|
||||||
|
"button": button
|
||||||
|
}
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
releases_to_skip -= 1
|
||||||
index += 1
|
index += 1
|
||||||
|
elif event["action"] == "press" and event["name"] not in COMMAND_KEYS and compress_typing:
|
||||||
# Compress the key down and key up actions
|
typing_words = ""
|
||||||
# todo: handling the key down and key up events
|
while index < len(events) and events[index]["action"] in ["press", "release"] and events[index]["name"] not in COMMAND_KEYS:
|
||||||
_new_actions = []
|
if events[index]["action"] == "press":
|
||||||
_action = list(result["action"])
|
keys_pressed.append(events[index]["name"])
|
||||||
idx = 0
|
typing_words += events[index]["name"] if events[index]["name"] not in typingkey2str else typingkey2str[events[index]["name"]]
|
||||||
|
elif events[index]["action"] == "release":
|
||||||
while True:
|
keys_pressed.remove(events[index]["name"])
|
||||||
if idx >= len(_action):
|
index += 1
|
||||||
break
|
if len(typing_words) > 1:
|
||||||
|
result.append({
|
||||||
if _action[idx]["action_type"] == self.enum_to_str(Action.KEY_DOWN):
|
"action_type": "TYPING",
|
||||||
typed_text = []
|
"parameters": {
|
||||||
while idx < len(_action) and _action[idx]["action_type"] in [self.enum_to_str(Action.KEY_DOWN), self.enum_to_str(Action.KEY_UP)] and len(_action[idx]["key"]) == 1:
|
"text": typing_words
|
||||||
if _action[idx]["action_type"] == self.enum_to_str(Action.KEY_DOWN):
|
}
|
||||||
typed_text.append(chr(_action[idx]["key"][0]))
|
})
|
||||||
idx += 1
|
|
||||||
if typed_text:
|
|
||||||
_new_actions.append({"action_type": self.enum_to_str(Action.TYPE), "text": typed_text})
|
|
||||||
else:
|
else:
|
||||||
_new_actions.append(_action[idx])
|
result.append({
|
||||||
idx += 1
|
"action_type": "PRESS",
|
||||||
|
"parameters": {
|
||||||
|
"key": typing_words
|
||||||
|
}
|
||||||
|
})
|
||||||
|
elif event["action"] == "press" and compress_press_key:
|
||||||
|
keys_pressed.append(event["name"])
|
||||||
|
result.append({
|
||||||
|
"action_type": "PRESS",
|
||||||
|
"parameters": {
|
||||||
|
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
|
||||||
|
event["name"]]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
index += 1
|
||||||
|
elif event["action"] == "release" and compress_press_key:
|
||||||
|
keys_pressed.remove(event["name"])
|
||||||
|
index += 1
|
||||||
else:
|
else:
|
||||||
_new_actions.append(_action[idx])
|
result.append(self.event_to_action(event))
|
||||||
idx += 1
|
index += 1
|
||||||
|
|
||||||
result["action"] = _new_actions
|
|
||||||
|
|
||||||
# Compress the scroll up and scroll down events
|
|
||||||
# todo: handling the key down and key up events
|
|
||||||
_new_actions = []
|
|
||||||
_action = list(result["action"])
|
|
||||||
idx = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if idx >= len(_action):
|
|
||||||
break
|
|
||||||
|
|
||||||
if _action[idx]["action_type"] == self.enum_to_str(Action.CLICK) and _action[idx]["click_type"] in [self.enum_to_str(MouseClick.WHEEL_UP), self.enum_to_str(MouseClick.WHEEL_DOWN)]:
|
|
||||||
typed_text = []
|
|
||||||
while idx < len(_action) and _action[idx]["action_type"] == self.enum_to_str(Action.CLICK) and _action[idx]["click_type"] in [self.enum_to_str(MouseClick.WHEEL_UP), self.enum_to_str(MouseClick.WHEEL_DOWN)]:
|
|
||||||
if _action[idx]["click_type"] == self.enum_to_str(MouseClick.WHEEL_UP):
|
|
||||||
typed_text.append("UP")
|
|
||||||
idx += 1
|
|
||||||
elif _action[idx]["click_type"] == self.enum_to_str(MouseClick.WHEEL_DOWN):
|
|
||||||
typed_text.append("DOWN")
|
|
||||||
idx += 1
|
|
||||||
_new_actions.append({"action_type": self.enum_to_str(Action.CLICK), "click_type": "SCROLL", "text": typed_text})
|
|
||||||
else:
|
|
||||||
_new_actions.append(_action[idx])
|
|
||||||
idx += 1
|
|
||||||
|
|
||||||
result["action"] = _new_actions
|
|
||||||
|
|
||||||
# Compress the mouse down and mouse up actions
|
|
||||||
# todo: handling the key down and key up events
|
|
||||||
_new_actions = []
|
|
||||||
_action = list(result["action"])
|
|
||||||
idx = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if idx >= len(_action):
|
|
||||||
break
|
|
||||||
if _action[idx]["action_type"] == self.enum_to_str(Action.MOUSE_DOWN):
|
|
||||||
if idx + 1 < len(_action) and _action[idx+1]["action_type"] == self.enum_to_str(Action.MOUSE_UP):
|
|
||||||
_new_actions.append({"action_type": self.enum_to_str(Action.CLICK), "click_type": _action[idx]["click_type"]})
|
|
||||||
idx += 2
|
|
||||||
else:
|
|
||||||
_new_actions.append(_action[idx])
|
|
||||||
idx += 1
|
|
||||||
else:
|
|
||||||
_new_actions.append(_action[idx])
|
|
||||||
idx += 1
|
|
||||||
|
|
||||||
result["action"] = _new_actions
|
|
||||||
|
|
||||||
with open(out_file, "w") as f:
|
with open(out_file, "w") as f:
|
||||||
json.dump(result, f)
|
json.dump(result, f)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
converter = DuckTrackEventActionConverter(human_readable=True)
|
converter = DuckTrackEventActionConverter()
|
||||||
converter.ducktrack_event_file_to_action(ducktrack_event_file="sample.jsonl",
|
converter.ducktrack_event_file_to_action(
|
||||||
out_file="output.json",
|
ducktrack_event_file="complex_clicking.jsonl",
|
||||||
compress_move=True)
|
out_file="complex_clicking5.json",
|
||||||
|
compress_move=True,
|
||||||
|
compress_scroll=True,
|
||||||
|
compress_click=True,
|
||||||
|
compress_drag=True,
|
||||||
|
compress_press_key=True,
|
||||||
|
compress_typing=True,
|
||||||
|
)
|
||||||
|
|||||||
111
utils/events_calc.json
Normal file
111
utils/events_calc.json
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 152,
|
||||||
|
"y": 259
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 464,
|
||||||
|
"y": 317
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 466,
|
||||||
|
"y": 317
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"key": "altleft"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"key": "="
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_UP",
|
||||||
|
"parameters": {
|
||||||
|
"key": "="
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_UP",
|
||||||
|
"parameters": {
|
||||||
|
"key": "altleft"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 709,
|
||||||
|
"y": 1047
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 709,
|
||||||
|
"y": 1047
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 717,
|
||||||
|
"y": 304
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOVE_TO",
|
||||||
|
"parameters": {
|
||||||
|
"x": 717,
|
||||||
|
"y": 304
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"parameters": {
|
||||||
|
"button": "left"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
423
utils/events_calc.jsonl
Normal file
423
utils/events_calc.jsonl
Normal file
@@ -0,0 +1,423 @@
|
|||||||
|
{"time_stamp": 21028.2899763, "action": "move", "x": 686, "y": 306}
|
||||||
|
{"time_stamp": 21028.2965794, "action": "move", "x": 684, "y": 306}
|
||||||
|
{"time_stamp": 21028.3046644, "action": "move", "x": 678, "y": 306}
|
||||||
|
{"time_stamp": 21028.3126807, "action": "move", "x": 670, "y": 306}
|
||||||
|
{"time_stamp": 21028.3208329, "action": "move", "x": 661, "y": 306}
|
||||||
|
{"time_stamp": 21028.3288313, "action": "move", "x": 645, "y": 306}
|
||||||
|
{"time_stamp": 21028.336626, "action": "move", "x": 625, "y": 306}
|
||||||
|
{"time_stamp": 21028.3445457, "action": "move", "x": 603, "y": 305}
|
||||||
|
{"time_stamp": 21028.3527487, "action": "move", "x": 574, "y": 303}
|
||||||
|
{"time_stamp": 21028.3606394, "action": "move", "x": 544, "y": 301}
|
||||||
|
{"time_stamp": 21028.3688565, "action": "move", "x": 508, "y": 300}
|
||||||
|
{"time_stamp": 21028.3768381, "action": "move", "x": 471, "y": 298}
|
||||||
|
{"time_stamp": 21028.3848709, "action": "move", "x": 430, "y": 296}
|
||||||
|
{"time_stamp": 21028.3926563, "action": "move", "x": 389, "y": 296}
|
||||||
|
{"time_stamp": 21028.4009164, "action": "move", "x": 348, "y": 296}
|
||||||
|
{"time_stamp": 21028.4089388, "action": "move", "x": 313, "y": 296}
|
||||||
|
{"time_stamp": 21028.4171707, "action": "move", "x": 280, "y": 296}
|
||||||
|
{"time_stamp": 21028.4245847, "action": "move", "x": 252, "y": 294}
|
||||||
|
{"time_stamp": 21028.4328148, "action": "move", "x": 225, "y": 294}
|
||||||
|
{"time_stamp": 21028.4406678, "action": "move", "x": 208, "y": 294}
|
||||||
|
{"time_stamp": 21028.4486998, "action": "move", "x": 192, "y": 294}
|
||||||
|
{"time_stamp": 21028.4568529, "action": "move", "x": 177, "y": 294}
|
||||||
|
{"time_stamp": 21028.4647334, "action": "move", "x": 163, "y": 293}
|
||||||
|
{"time_stamp": 21028.4729702, "action": "move", "x": 153, "y": 293}
|
||||||
|
{"time_stamp": 21028.4808044, "action": "move", "x": 143, "y": 293}
|
||||||
|
{"time_stamp": 21028.4889062, "action": "move", "x": 135, "y": 293}
|
||||||
|
{"time_stamp": 21028.4967676, "action": "move", "x": 130, "y": 293}
|
||||||
|
{"time_stamp": 21028.5050544, "action": "move", "x": 124, "y": 293}
|
||||||
|
{"time_stamp": 21028.5127317, "action": "move", "x": 120, "y": 293}
|
||||||
|
{"time_stamp": 21028.520827, "action": "move", "x": 117, "y": 293}
|
||||||
|
{"time_stamp": 21028.5289378, "action": "move", "x": 114, "y": 293}
|
||||||
|
{"time_stamp": 21028.5371078, "action": "move", "x": 111, "y": 293}
|
||||||
|
{"time_stamp": 21028.545514, "action": "move", "x": 107, "y": 293}
|
||||||
|
{"time_stamp": 21028.5527022, "action": "move", "x": 104, "y": 292}
|
||||||
|
{"time_stamp": 21028.5605384, "action": "move", "x": 100, "y": 292}
|
||||||
|
{"time_stamp": 21028.5686583, "action": "move", "x": 96, "y": 291}
|
||||||
|
{"time_stamp": 21028.5766951, "action": "move", "x": 90, "y": 291}
|
||||||
|
{"time_stamp": 21028.5847502, "action": "move", "x": 85, "y": 291}
|
||||||
|
{"time_stamp": 21028.5926223, "action": "move", "x": 79, "y": 290}
|
||||||
|
{"time_stamp": 21028.6007454, "action": "move", "x": 74, "y": 290}
|
||||||
|
{"time_stamp": 21028.6088707, "action": "move", "x": 70, "y": 289}
|
||||||
|
{"time_stamp": 21028.6166501, "action": "move", "x": 67, "y": 289}
|
||||||
|
{"time_stamp": 21028.6249259, "action": "move", "x": 66, "y": 289}
|
||||||
|
{"time_stamp": 21028.6647889, "action": "move", "x": 66, "y": 289}
|
||||||
|
{"time_stamp": 21028.6728642, "action": "move", "x": 68, "y": 288}
|
||||||
|
{"time_stamp": 21028.6807781, "action": "move", "x": 70, "y": 286}
|
||||||
|
{"time_stamp": 21028.6888295, "action": "move", "x": 74, "y": 285}
|
||||||
|
{"time_stamp": 21028.6971027, "action": "move", "x": 77, "y": 284}
|
||||||
|
{"time_stamp": 21028.7046499, "action": "move", "x": 81, "y": 282}
|
||||||
|
{"time_stamp": 21028.7129405, "action": "move", "x": 86, "y": 281}
|
||||||
|
{"time_stamp": 21028.7205325, "action": "move", "x": 91, "y": 279}
|
||||||
|
{"time_stamp": 21028.7285422, "action": "move", "x": 98, "y": 278}
|
||||||
|
{"time_stamp": 21028.7366509, "action": "move", "x": 104, "y": 275}
|
||||||
|
{"time_stamp": 21028.7448279, "action": "move", "x": 110, "y": 275}
|
||||||
|
{"time_stamp": 21028.7527897, "action": "move", "x": 116, "y": 273}
|
||||||
|
{"time_stamp": 21028.7609718, "action": "move", "x": 120, "y": 272}
|
||||||
|
{"time_stamp": 21028.7688693, "action": "move", "x": 124, "y": 271}
|
||||||
|
{"time_stamp": 21028.7766846, "action": "move", "x": 128, "y": 270}
|
||||||
|
{"time_stamp": 21028.7848371, "action": "move", "x": 131, "y": 270}
|
||||||
|
{"time_stamp": 21028.7927773, "action": "move", "x": 133, "y": 268}
|
||||||
|
{"time_stamp": 21028.8007498, "action": "move", "x": 134, "y": 268}
|
||||||
|
{"time_stamp": 21028.8088143, "action": "move", "x": 136, "y": 268}
|
||||||
|
{"time_stamp": 21028.8168157, "action": "move", "x": 137, "y": 268}
|
||||||
|
{"time_stamp": 21028.8246469, "action": "move", "x": 139, "y": 268}
|
||||||
|
{"time_stamp": 21028.8327817, "action": "move", "x": 140, "y": 268}
|
||||||
|
{"time_stamp": 21028.8408239, "action": "move", "x": 141, "y": 268}
|
||||||
|
{"time_stamp": 21028.8488115, "action": "move", "x": 142, "y": 267}
|
||||||
|
{"time_stamp": 21028.8571578, "action": "move", "x": 143, "y": 267}
|
||||||
|
{"time_stamp": 21028.8646641, "action": "move", "x": 144, "y": 267}
|
||||||
|
{"time_stamp": 21028.8741985, "action": "move", "x": 145, "y": 267}
|
||||||
|
{"time_stamp": 21028.8809717, "action": "move", "x": 146, "y": 267}
|
||||||
|
{"time_stamp": 21028.8888646, "action": "move", "x": 146, "y": 267}
|
||||||
|
{"time_stamp": 21028.961049, "action": "move", "x": 146, "y": 266}
|
||||||
|
{"time_stamp": 21029.0249854, "action": "move", "x": 147, "y": 265}
|
||||||
|
{"time_stamp": 21029.0328138, "action": "move", "x": 147, "y": 264}
|
||||||
|
{"time_stamp": 21029.0407582, "action": "move", "x": 147, "y": 264}
|
||||||
|
{"time_stamp": 21029.0487772, "action": "move", "x": 148, "y": 263}
|
||||||
|
{"time_stamp": 21029.0569372, "action": "move", "x": 148, "y": 263}
|
||||||
|
{"time_stamp": 21029.065073, "action": "move", "x": 149, "y": 262}
|
||||||
|
{"time_stamp": 21029.0729933, "action": "move", "x": 150, "y": 262}
|
||||||
|
{"time_stamp": 21029.0888149, "action": "move", "x": 150, "y": 261}
|
||||||
|
{"time_stamp": 21029.0971595, "action": "move", "x": 151, "y": 260}
|
||||||
|
{"time_stamp": 21029.10458, "action": "move", "x": 151, "y": 260}
|
||||||
|
{"time_stamp": 21029.1126284, "action": "move", "x": 151, "y": 260}
|
||||||
|
{"time_stamp": 21029.1208764, "action": "move", "x": 151, "y": 259}
|
||||||
|
{"time_stamp": 21029.1287413, "action": "move", "x": 152, "y": 259}
|
||||||
|
{"time_stamp": 21029.1611214, "action": "move", "x": 152, "y": 259}
|
||||||
|
{"time_stamp": 21029.1614723, "action": "click", "x": 152, "y": 259, "button": "left", "pressed": true}
|
||||||
|
{"time_stamp": 21029.2168134, "action": "move", "x": 152, "y": 259}
|
||||||
|
{"time_stamp": 21029.2248681, "action": "move", "x": 154, "y": 259}
|
||||||
|
{"time_stamp": 21029.2327317, "action": "move", "x": 156, "y": 260}
|
||||||
|
{"time_stamp": 21029.2408222, "action": "move", "x": 158, "y": 262}
|
||||||
|
{"time_stamp": 21029.2487515, "action": "move", "x": 163, "y": 263}
|
||||||
|
{"time_stamp": 21029.2568152, "action": "move", "x": 169, "y": 266}
|
||||||
|
{"time_stamp": 21029.2649126, "action": "move", "x": 174, "y": 270}
|
||||||
|
{"time_stamp": 21029.2727425, "action": "move", "x": 183, "y": 273}
|
||||||
|
{"time_stamp": 21029.2807226, "action": "move", "x": 190, "y": 276}
|
||||||
|
{"time_stamp": 21029.2887741, "action": "move", "x": 200, "y": 279}
|
||||||
|
{"time_stamp": 21029.296883, "action": "move", "x": 209, "y": 282}
|
||||||
|
{"time_stamp": 21029.304834, "action": "move", "x": 220, "y": 285}
|
||||||
|
{"time_stamp": 21029.3131548, "action": "move", "x": 233, "y": 287}
|
||||||
|
{"time_stamp": 21029.3207916, "action": "move", "x": 244, "y": 290}
|
||||||
|
{"time_stamp": 21029.3290871, "action": "move", "x": 256, "y": 292}
|
||||||
|
{"time_stamp": 21029.3366508, "action": "move", "x": 268, "y": 293}
|
||||||
|
{"time_stamp": 21029.3445108, "action": "move", "x": 279, "y": 294}
|
||||||
|
{"time_stamp": 21029.3529213, "action": "move", "x": 288, "y": 297}
|
||||||
|
{"time_stamp": 21029.3607282, "action": "move", "x": 298, "y": 297}
|
||||||
|
{"time_stamp": 21029.3691604, "action": "move", "x": 307, "y": 297}
|
||||||
|
{"time_stamp": 21029.3769931, "action": "move", "x": 316, "y": 298}
|
||||||
|
{"time_stamp": 21029.3850192, "action": "move", "x": 324, "y": 300}
|
||||||
|
{"time_stamp": 21029.3927881, "action": "move", "x": 331, "y": 301}
|
||||||
|
{"time_stamp": 21029.4007925, "action": "move", "x": 336, "y": 302}
|
||||||
|
{"time_stamp": 21029.4088638, "action": "move", "x": 342, "y": 304}
|
||||||
|
{"time_stamp": 21029.4167924, "action": "move", "x": 346, "y": 304}
|
||||||
|
{"time_stamp": 21029.4251047, "action": "move", "x": 349, "y": 304}
|
||||||
|
{"time_stamp": 21029.4328699, "action": "move", "x": 352, "y": 306}
|
||||||
|
{"time_stamp": 21029.4409293, "action": "move", "x": 355, "y": 306}
|
||||||
|
{"time_stamp": 21029.4487136, "action": "move", "x": 356, "y": 307}
|
||||||
|
{"time_stamp": 21029.4568755, "action": "move", "x": 358, "y": 308}
|
||||||
|
{"time_stamp": 21029.4647053, "action": "move", "x": 361, "y": 309}
|
||||||
|
{"time_stamp": 21029.4728173, "action": "move", "x": 363, "y": 310}
|
||||||
|
{"time_stamp": 21029.4806011, "action": "move", "x": 365, "y": 311}
|
||||||
|
{"time_stamp": 21029.4889321, "action": "move", "x": 367, "y": 312}
|
||||||
|
{"time_stamp": 21029.4967544, "action": "move", "x": 370, "y": 313}
|
||||||
|
{"time_stamp": 21029.5049087, "action": "move", "x": 374, "y": 314}
|
||||||
|
{"time_stamp": 21029.5129759, "action": "move", "x": 377, "y": 316}
|
||||||
|
{"time_stamp": 21029.5210278, "action": "move", "x": 381, "y": 317}
|
||||||
|
{"time_stamp": 21029.5286154, "action": "move", "x": 386, "y": 317}
|
||||||
|
{"time_stamp": 21029.5371491, "action": "move", "x": 390, "y": 318}
|
||||||
|
{"time_stamp": 21029.5449815, "action": "move", "x": 393, "y": 319}
|
||||||
|
{"time_stamp": 21029.5526305, "action": "move", "x": 397, "y": 319}
|
||||||
|
{"time_stamp": 21029.5604721, "action": "move", "x": 400, "y": 319}
|
||||||
|
{"time_stamp": 21029.5690371, "action": "move", "x": 402, "y": 319}
|
||||||
|
{"time_stamp": 21029.5772927, "action": "move", "x": 405, "y": 319}
|
||||||
|
{"time_stamp": 21029.5846161, "action": "move", "x": 406, "y": 319}
|
||||||
|
{"time_stamp": 21029.5928399, "action": "move", "x": 407, "y": 319}
|
||||||
|
{"time_stamp": 21029.6007032, "action": "move", "x": 408, "y": 319}
|
||||||
|
{"time_stamp": 21029.609118, "action": "move", "x": 409, "y": 319}
|
||||||
|
{"time_stamp": 21029.6166036, "action": "move", "x": 411, "y": 320}
|
||||||
|
{"time_stamp": 21029.6249215, "action": "move", "x": 412, "y": 320}
|
||||||
|
{"time_stamp": 21029.6327262, "action": "move", "x": 414, "y": 320}
|
||||||
|
{"time_stamp": 21029.6408018, "action": "move", "x": 415, "y": 320}
|
||||||
|
{"time_stamp": 21029.649463, "action": "move", "x": 418, "y": 320}
|
||||||
|
{"time_stamp": 21029.6575693, "action": "move", "x": 420, "y": 320}
|
||||||
|
{"time_stamp": 21029.6650956, "action": "move", "x": 423, "y": 320}
|
||||||
|
{"time_stamp": 21029.6729346, "action": "move", "x": 426, "y": 320}
|
||||||
|
{"time_stamp": 21029.6808747, "action": "move", "x": 429, "y": 320}
|
||||||
|
{"time_stamp": 21029.688616, "action": "move", "x": 432, "y": 320}
|
||||||
|
{"time_stamp": 21029.6970675, "action": "move", "x": 435, "y": 320}
|
||||||
|
{"time_stamp": 21029.7049324, "action": "move", "x": 438, "y": 320}
|
||||||
|
{"time_stamp": 21029.7130458, "action": "move", "x": 439, "y": 320}
|
||||||
|
{"time_stamp": 21029.7207522, "action": "move", "x": 440, "y": 320}
|
||||||
|
{"time_stamp": 21029.7289775, "action": "move", "x": 442, "y": 320}
|
||||||
|
{"time_stamp": 21029.7366577, "action": "move", "x": 443, "y": 320}
|
||||||
|
{"time_stamp": 21029.7444825, "action": "move", "x": 445, "y": 320}
|
||||||
|
{"time_stamp": 21029.7526551, "action": "move", "x": 447, "y": 320}
|
||||||
|
{"time_stamp": 21029.7604951, "action": "move", "x": 448, "y": 320}
|
||||||
|
{"time_stamp": 21029.7686569, "action": "move", "x": 450, "y": 319}
|
||||||
|
{"time_stamp": 21029.7775496, "action": "move", "x": 451, "y": 319}
|
||||||
|
{"time_stamp": 21029.7849685, "action": "move", "x": 451, "y": 319}
|
||||||
|
{"time_stamp": 21029.7929356, "action": "move", "x": 452, "y": 319}
|
||||||
|
{"time_stamp": 21029.8007005, "action": "move", "x": 452, "y": 319}
|
||||||
|
{"time_stamp": 21029.8170717, "action": "move", "x": 453, "y": 319}
|
||||||
|
{"time_stamp": 21029.8248574, "action": "move", "x": 453, "y": 318}
|
||||||
|
{"time_stamp": 21029.8330359, "action": "move", "x": 454, "y": 318}
|
||||||
|
{"time_stamp": 21029.8407804, "action": "move", "x": 454, "y": 318}
|
||||||
|
{"time_stamp": 21029.8487615, "action": "move", "x": 455, "y": 318}
|
||||||
|
{"time_stamp": 21029.8648369, "action": "move", "x": 455, "y": 318}
|
||||||
|
{"time_stamp": 21029.8726477, "action": "move", "x": 456, "y": 318}
|
||||||
|
{"time_stamp": 21029.8809607, "action": "move", "x": 457, "y": 317}
|
||||||
|
{"time_stamp": 21029.8888473, "action": "move", "x": 457, "y": 317}
|
||||||
|
{"time_stamp": 21029.9048933, "action": "move", "x": 458, "y": 317}
|
||||||
|
{"time_stamp": 21029.9129577, "action": "move", "x": 458, "y": 317}
|
||||||
|
{"time_stamp": 21029.9208533, "action": "move", "x": 459, "y": 317}
|
||||||
|
{"time_stamp": 21029.9286645, "action": "move", "x": 459, "y": 317}
|
||||||
|
{"time_stamp": 21029.9368461, "action": "move", "x": 461, "y": 317}
|
||||||
|
{"time_stamp": 21029.9448712, "action": "move", "x": 461, "y": 317}
|
||||||
|
{"time_stamp": 21029.953212, "action": "move", "x": 462, "y": 317}
|
||||||
|
{"time_stamp": 21029.9608238, "action": "move", "x": 463, "y": 317}
|
||||||
|
{"time_stamp": 21029.9686821, "action": "move", "x": 463, "y": 317}
|
||||||
|
{"time_stamp": 21029.9768342, "action": "move", "x": 464, "y": 317}
|
||||||
|
{"time_stamp": 21030.361149, "action": "move", "x": 464, "y": 317}
|
||||||
|
{"time_stamp": 21030.3613383, "action": "click", "x": 464, "y": 317, "button": "left", "pressed": false}
|
||||||
|
{"time_stamp": 21030.9690893, "action": "move", "x": 465, "y": 317}
|
||||||
|
{"time_stamp": 21030.9770331, "action": "move", "x": 465, "y": 317}
|
||||||
|
{"time_stamp": 21030.9933165, "action": "move", "x": 466, "y": 317}
|
||||||
|
{"time_stamp": 21031.8410512, "action": "press", "name": "alt_l"}
|
||||||
|
{"time_stamp": 21032.1375784, "action": "press", "name": "="}
|
||||||
|
{"time_stamp": 21032.2331653, "action": "release", "name": "="}
|
||||||
|
{"time_stamp": 21032.4009051, "action": "release", "name": "alt_l"}
|
||||||
|
{"time_stamp": 21033.1212821, "action": "move", "x": 466, "y": 317}
|
||||||
|
{"time_stamp": 21033.1289659, "action": "move", "x": 467, "y": 320}
|
||||||
|
{"time_stamp": 21033.1370348, "action": "move", "x": 471, "y": 325}
|
||||||
|
{"time_stamp": 21033.1456134, "action": "move", "x": 475, "y": 332}
|
||||||
|
{"time_stamp": 21033.1531721, "action": "move", "x": 482, "y": 340}
|
||||||
|
{"time_stamp": 21033.1605014, "action": "move", "x": 490, "y": 349}
|
||||||
|
{"time_stamp": 21033.1692663, "action": "move", "x": 498, "y": 359}
|
||||||
|
{"time_stamp": 21033.1771117, "action": "move", "x": 508, "y": 371}
|
||||||
|
{"time_stamp": 21033.1850449, "action": "move", "x": 521, "y": 383}
|
||||||
|
{"time_stamp": 21033.1929826, "action": "move", "x": 535, "y": 399}
|
||||||
|
{"time_stamp": 21033.201192, "action": "move", "x": 546, "y": 415}
|
||||||
|
{"time_stamp": 21033.2089185, "action": "move", "x": 555, "y": 434}
|
||||||
|
{"time_stamp": 21033.216848, "action": "move", "x": 563, "y": 452}
|
||||||
|
{"time_stamp": 21033.2246769, "action": "move", "x": 570, "y": 469}
|
||||||
|
{"time_stamp": 21033.2328685, "action": "move", "x": 574, "y": 485}
|
||||||
|
{"time_stamp": 21033.2407514, "action": "move", "x": 577, "y": 503}
|
||||||
|
{"time_stamp": 21033.2488102, "action": "move", "x": 578, "y": 518}
|
||||||
|
{"time_stamp": 21033.2569003, "action": "move", "x": 578, "y": 534}
|
||||||
|
{"time_stamp": 21033.2654896, "action": "move", "x": 580, "y": 552}
|
||||||
|
{"time_stamp": 21033.2730147, "action": "move", "x": 580, "y": 571}
|
||||||
|
{"time_stamp": 21033.2808888, "action": "move", "x": 582, "y": 592}
|
||||||
|
{"time_stamp": 21033.2890461, "action": "move", "x": 583, "y": 617}
|
||||||
|
{"time_stamp": 21033.2968868, "action": "move", "x": 586, "y": 643}
|
||||||
|
{"time_stamp": 21033.3050093, "action": "move", "x": 588, "y": 665}
|
||||||
|
{"time_stamp": 21033.3129685, "action": "move", "x": 591, "y": 694}
|
||||||
|
{"time_stamp": 21033.3210515, "action": "move", "x": 592, "y": 716}
|
||||||
|
{"time_stamp": 21033.3289082, "action": "move", "x": 594, "y": 735}
|
||||||
|
{"time_stamp": 21033.3368274, "action": "move", "x": 598, "y": 751}
|
||||||
|
{"time_stamp": 21033.3446464, "action": "move", "x": 601, "y": 761}
|
||||||
|
{"time_stamp": 21033.3532343, "action": "move", "x": 604, "y": 773}
|
||||||
|
{"time_stamp": 21033.3607161, "action": "move", "x": 606, "y": 783}
|
||||||
|
{"time_stamp": 21033.3687129, "action": "move", "x": 608, "y": 794}
|
||||||
|
{"time_stamp": 21033.3769088, "action": "move", "x": 611, "y": 804}
|
||||||
|
{"time_stamp": 21033.3846615, "action": "move", "x": 614, "y": 816}
|
||||||
|
{"time_stamp": 21033.3927661, "action": "move", "x": 617, "y": 826}
|
||||||
|
{"time_stamp": 21033.4008999, "action": "move", "x": 619, "y": 837}
|
||||||
|
{"time_stamp": 21033.408732, "action": "move", "x": 621, "y": 846}
|
||||||
|
{"time_stamp": 21033.4169038, "action": "move", "x": 623, "y": 856}
|
||||||
|
{"time_stamp": 21033.4250181, "action": "move", "x": 623, "y": 865}
|
||||||
|
{"time_stamp": 21033.4329144, "action": "move", "x": 624, "y": 875}
|
||||||
|
{"time_stamp": 21033.4410593, "action": "move", "x": 624, "y": 883}
|
||||||
|
{"time_stamp": 21033.448994, "action": "move", "x": 626, "y": 891}
|
||||||
|
{"time_stamp": 21033.4570193, "action": "move", "x": 626, "y": 899}
|
||||||
|
{"time_stamp": 21033.4648038, "action": "move", "x": 627, "y": 906}
|
||||||
|
{"time_stamp": 21033.4730101, "action": "move", "x": 628, "y": 913}
|
||||||
|
{"time_stamp": 21033.4815421, "action": "move", "x": 631, "y": 920}
|
||||||
|
{"time_stamp": 21033.4891275, "action": "move", "x": 635, "y": 926}
|
||||||
|
{"time_stamp": 21033.4970011, "action": "move", "x": 639, "y": 930}
|
||||||
|
{"time_stamp": 21033.5047772, "action": "move", "x": 647, "y": 935}
|
||||||
|
{"time_stamp": 21033.5132552, "action": "move", "x": 653, "y": 939}
|
||||||
|
{"time_stamp": 21033.5211245, "action": "move", "x": 659, "y": 943}
|
||||||
|
{"time_stamp": 21033.5292347, "action": "move", "x": 665, "y": 947}
|
||||||
|
{"time_stamp": 21033.5373088, "action": "move", "x": 671, "y": 950}
|
||||||
|
{"time_stamp": 21033.5447875, "action": "move", "x": 677, "y": 955}
|
||||||
|
{"time_stamp": 21033.5529495, "action": "move", "x": 684, "y": 960}
|
||||||
|
{"time_stamp": 21033.5609559, "action": "move", "x": 690, "y": 965}
|
||||||
|
{"time_stamp": 21033.5689335, "action": "move", "x": 696, "y": 971}
|
||||||
|
{"time_stamp": 21033.5768783, "action": "move", "x": 700, "y": 977}
|
||||||
|
{"time_stamp": 21033.5846548, "action": "move", "x": 703, "y": 981}
|
||||||
|
{"time_stamp": 21033.5931357, "action": "move", "x": 705, "y": 985}
|
||||||
|
{"time_stamp": 21033.6009205, "action": "move", "x": 707, "y": 988}
|
||||||
|
{"time_stamp": 21033.6088781, "action": "move", "x": 708, "y": 991}
|
||||||
|
{"time_stamp": 21033.6169713, "action": "move", "x": 709, "y": 994}
|
||||||
|
{"time_stamp": 21033.6249134, "action": "move", "x": 709, "y": 997}
|
||||||
|
{"time_stamp": 21033.6328882, "action": "move", "x": 710, "y": 999}
|
||||||
|
{"time_stamp": 21033.6412016, "action": "move", "x": 711, "y": 1003}
|
||||||
|
{"time_stamp": 21033.648939, "action": "move", "x": 711, "y": 1007}
|
||||||
|
{"time_stamp": 21033.6572201, "action": "move", "x": 713, "y": 1010}
|
||||||
|
{"time_stamp": 21033.6647348, "action": "move", "x": 715, "y": 1013}
|
||||||
|
{"time_stamp": 21033.6730325, "action": "move", "x": 716, "y": 1017}
|
||||||
|
{"time_stamp": 21033.6810552, "action": "move", "x": 717, "y": 1021}
|
||||||
|
{"time_stamp": 21033.6890871, "action": "move", "x": 719, "y": 1024}
|
||||||
|
{"time_stamp": 21033.6969594, "action": "move", "x": 720, "y": 1026}
|
||||||
|
{"time_stamp": 21033.7048284, "action": "move", "x": 720, "y": 1028}
|
||||||
|
{"time_stamp": 21033.7126425, "action": "move", "x": 720, "y": 1028}
|
||||||
|
{"time_stamp": 21033.7610156, "action": "move", "x": 720, "y": 1029}
|
||||||
|
{"time_stamp": 21033.7693689, "action": "move", "x": 720, "y": 1029}
|
||||||
|
{"time_stamp": 21033.7772628, "action": "move", "x": 720, "y": 1030}
|
||||||
|
{"time_stamp": 21033.7847737, "action": "move", "x": 720, "y": 1031}
|
||||||
|
{"time_stamp": 21033.7929223, "action": "move", "x": 719, "y": 1031}
|
||||||
|
{"time_stamp": 21033.801029, "action": "move", "x": 719, "y": 1032}
|
||||||
|
{"time_stamp": 21033.808944, "action": "move", "x": 718, "y": 1033}
|
||||||
|
{"time_stamp": 21033.8169394, "action": "move", "x": 717, "y": 1035}
|
||||||
|
{"time_stamp": 21033.8248771, "action": "move", "x": 716, "y": 1035}
|
||||||
|
{"time_stamp": 21033.8334548, "action": "move", "x": 716, "y": 1036}
|
||||||
|
{"time_stamp": 21033.8410779, "action": "move", "x": 715, "y": 1037}
|
||||||
|
{"time_stamp": 21033.8486117, "action": "move", "x": 715, "y": 1039}
|
||||||
|
{"time_stamp": 21033.8568906, "action": "move", "x": 713, "y": 1039}
|
||||||
|
{"time_stamp": 21033.8649249, "action": "move", "x": 712, "y": 1040}
|
||||||
|
{"time_stamp": 21033.8729566, "action": "move", "x": 712, "y": 1042}
|
||||||
|
{"time_stamp": 21033.8810286, "action": "move", "x": 711, "y": 1043}
|
||||||
|
{"time_stamp": 21033.8888454, "action": "move", "x": 711, "y": 1044}
|
||||||
|
{"time_stamp": 21033.8970736, "action": "move", "x": 709, "y": 1045}
|
||||||
|
{"time_stamp": 21033.9051884, "action": "move", "x": 709, "y": 1046}
|
||||||
|
{"time_stamp": 21033.91297, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21033.9210518, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21033.9770341, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21033.9932821, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21033.9933595, "action": "click", "x": 709, "y": 1047, "button": "left", "pressed": true}
|
||||||
|
{"time_stamp": 21034.0734669, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21034.0737272, "action": "click", "x": 709, "y": 1047, "button": "left", "pressed": false}
|
||||||
|
{"time_stamp": 21034.1450402, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21034.1608305, "action": "move", "x": 709, "y": 1047}
|
||||||
|
{"time_stamp": 21034.1690642, "action": "move", "x": 709, "y": 1046}
|
||||||
|
{"time_stamp": 21034.1770086, "action": "move", "x": 709, "y": 1045}
|
||||||
|
{"time_stamp": 21034.1849649, "action": "move", "x": 709, "y": 1044}
|
||||||
|
{"time_stamp": 21034.1927171, "action": "move", "x": 709, "y": 1043}
|
||||||
|
{"time_stamp": 21034.2008052, "action": "move", "x": 709, "y": 1040}
|
||||||
|
{"time_stamp": 21034.2088854, "action": "move", "x": 709, "y": 1038}
|
||||||
|
{"time_stamp": 21034.2167939, "action": "move", "x": 709, "y": 1034}
|
||||||
|
{"time_stamp": 21034.224882, "action": "move", "x": 709, "y": 1029}
|
||||||
|
{"time_stamp": 21034.2327267, "action": "move", "x": 711, "y": 1023}
|
||||||
|
{"time_stamp": 21034.2408131, "action": "move", "x": 711, "y": 1016}
|
||||||
|
{"time_stamp": 21034.2502186, "action": "move", "x": 712, "y": 1005}
|
||||||
|
{"time_stamp": 21034.256732, "action": "move", "x": 713, "y": 991}
|
||||||
|
{"time_stamp": 21034.2646169, "action": "move", "x": 716, "y": 976}
|
||||||
|
{"time_stamp": 21034.2729272, "action": "move", "x": 719, "y": 955}
|
||||||
|
{"time_stamp": 21034.2813953, "action": "move", "x": 722, "y": 929}
|
||||||
|
{"time_stamp": 21034.2889074, "action": "move", "x": 723, "y": 899}
|
||||||
|
{"time_stamp": 21034.2971538, "action": "move", "x": 725, "y": 871}
|
||||||
|
{"time_stamp": 21034.3049341, "action": "move", "x": 727, "y": 838}
|
||||||
|
{"time_stamp": 21034.3130394, "action": "move", "x": 727, "y": 805}
|
||||||
|
{"time_stamp": 21034.3208269, "action": "move", "x": 728, "y": 771}
|
||||||
|
{"time_stamp": 21034.3289492, "action": "move", "x": 728, "y": 742}
|
||||||
|
{"time_stamp": 21034.3367866, "action": "move", "x": 728, "y": 714}
|
||||||
|
{"time_stamp": 21034.3446895, "action": "move", "x": 728, "y": 686}
|
||||||
|
{"time_stamp": 21034.3528319, "action": "move", "x": 728, "y": 662}
|
||||||
|
{"time_stamp": 21034.3606113, "action": "move", "x": 728, "y": 643}
|
||||||
|
{"time_stamp": 21034.3686987, "action": "move", "x": 727, "y": 620}
|
||||||
|
{"time_stamp": 21034.3766536, "action": "move", "x": 725, "y": 605}
|
||||||
|
{"time_stamp": 21034.3847084, "action": "move", "x": 722, "y": 589}
|
||||||
|
{"time_stamp": 21034.3930586, "action": "move", "x": 719, "y": 576}
|
||||||
|
{"time_stamp": 21034.4009346, "action": "move", "x": 716, "y": 565}
|
||||||
|
{"time_stamp": 21034.4090089, "action": "move", "x": 712, "y": 554}
|
||||||
|
{"time_stamp": 21034.416996, "action": "move", "x": 710, "y": 544}
|
||||||
|
{"time_stamp": 21034.4246653, "action": "move", "x": 708, "y": 536}
|
||||||
|
{"time_stamp": 21034.4331124, "action": "move", "x": 707, "y": 527}
|
||||||
|
{"time_stamp": 21034.4410156, "action": "move", "x": 706, "y": 519}
|
||||||
|
{"time_stamp": 21034.4488925, "action": "move", "x": 705, "y": 509}
|
||||||
|
{"time_stamp": 21034.4568042, "action": "move", "x": 705, "y": 500}
|
||||||
|
{"time_stamp": 21034.4650783, "action": "move", "x": 704, "y": 492}
|
||||||
|
{"time_stamp": 21034.472962, "action": "move", "x": 703, "y": 483}
|
||||||
|
{"time_stamp": 21034.4809251, "action": "move", "x": 703, "y": 475}
|
||||||
|
{"time_stamp": 21034.4889399, "action": "move", "x": 703, "y": 467}
|
||||||
|
{"time_stamp": 21034.4968154, "action": "move", "x": 703, "y": 460}
|
||||||
|
{"time_stamp": 21034.505111, "action": "move", "x": 703, "y": 454}
|
||||||
|
{"time_stamp": 21034.5128327, "action": "move", "x": 703, "y": 446}
|
||||||
|
{"time_stamp": 21034.5211697, "action": "move", "x": 704, "y": 439}
|
||||||
|
{"time_stamp": 21034.5291453, "action": "move", "x": 704, "y": 432}
|
||||||
|
{"time_stamp": 21034.53683, "action": "move", "x": 704, "y": 428}
|
||||||
|
{"time_stamp": 21034.5453754, "action": "move", "x": 705, "y": 423}
|
||||||
|
{"time_stamp": 21034.5531997, "action": "move", "x": 705, "y": 419}
|
||||||
|
{"time_stamp": 21034.5610828, "action": "move", "x": 705, "y": 417}
|
||||||
|
{"time_stamp": 21034.568917, "action": "move", "x": 705, "y": 414}
|
||||||
|
{"time_stamp": 21034.5768693, "action": "move", "x": 705, "y": 412}
|
||||||
|
{"time_stamp": 21034.5849601, "action": "move", "x": 706, "y": 409}
|
||||||
|
{"time_stamp": 21034.5930116, "action": "move", "x": 706, "y": 406}
|
||||||
|
{"time_stamp": 21034.6006017, "action": "move", "x": 706, "y": 404}
|
||||||
|
{"time_stamp": 21034.6086777, "action": "move", "x": 706, "y": 402}
|
||||||
|
{"time_stamp": 21034.6167229, "action": "move", "x": 706, "y": 400}
|
||||||
|
{"time_stamp": 21034.6251342, "action": "move", "x": 706, "y": 398}
|
||||||
|
{"time_stamp": 21034.6325694, "action": "move", "x": 706, "y": 396}
|
||||||
|
{"time_stamp": 21034.6407476, "action": "move", "x": 706, "y": 393}
|
||||||
|
{"time_stamp": 21034.6489079, "action": "move", "x": 707, "y": 390}
|
||||||
|
{"time_stamp": 21034.6567719, "action": "move", "x": 707, "y": 388}
|
||||||
|
{"time_stamp": 21034.6648437, "action": "move", "x": 707, "y": 386}
|
||||||
|
{"time_stamp": 21034.6735978, "action": "move", "x": 707, "y": 383}
|
||||||
|
{"time_stamp": 21034.6808034, "action": "move", "x": 707, "y": 381}
|
||||||
|
{"time_stamp": 21034.6887831, "action": "move", "x": 707, "y": 379}
|
||||||
|
{"time_stamp": 21034.6968931, "action": "move", "x": 707, "y": 377}
|
||||||
|
{"time_stamp": 21034.7048123, "action": "move", "x": 707, "y": 375}
|
||||||
|
{"time_stamp": 21034.7127621, "action": "move", "x": 706, "y": 373}
|
||||||
|
{"time_stamp": 21034.7208214, "action": "move", "x": 706, "y": 372}
|
||||||
|
{"time_stamp": 21034.7289712, "action": "move", "x": 705, "y": 371}
|
||||||
|
{"time_stamp": 21034.7366015, "action": "move", "x": 705, "y": 370}
|
||||||
|
{"time_stamp": 21034.7449792, "action": "move", "x": 705, "y": 369}
|
||||||
|
{"time_stamp": 21034.7528215, "action": "move", "x": 705, "y": 368}
|
||||||
|
{"time_stamp": 21034.7611243, "action": "move", "x": 705, "y": 367}
|
||||||
|
{"time_stamp": 21034.7689338, "action": "move", "x": 705, "y": 366}
|
||||||
|
{"time_stamp": 21034.7768638, "action": "move", "x": 705, "y": 365}
|
||||||
|
{"time_stamp": 21034.7849091, "action": "move", "x": 705, "y": 364}
|
||||||
|
{"time_stamp": 21034.792848, "action": "move", "x": 705, "y": 363}
|
||||||
|
{"time_stamp": 21034.8010344, "action": "move", "x": 705, "y": 362}
|
||||||
|
{"time_stamp": 21034.809155, "action": "move", "x": 704, "y": 362}
|
||||||
|
{"time_stamp": 21034.8166183, "action": "move", "x": 704, "y": 359}
|
||||||
|
{"time_stamp": 21034.8249556, "action": "move", "x": 704, "y": 358}
|
||||||
|
{"time_stamp": 21034.8333238, "action": "move", "x": 704, "y": 356}
|
||||||
|
{"time_stamp": 21034.8410045, "action": "move", "x": 703, "y": 354}
|
||||||
|
{"time_stamp": 21034.8486685, "action": "move", "x": 703, "y": 352}
|
||||||
|
{"time_stamp": 21034.857368, "action": "move", "x": 703, "y": 350}
|
||||||
|
{"time_stamp": 21034.8647224, "action": "move", "x": 703, "y": 347}
|
||||||
|
{"time_stamp": 21034.8730798, "action": "move", "x": 703, "y": 346}
|
||||||
|
{"time_stamp": 21034.8809692, "action": "move", "x": 703, "y": 342}
|
||||||
|
{"time_stamp": 21034.8889165, "action": "move", "x": 703, "y": 341}
|
||||||
|
{"time_stamp": 21034.8969094, "action": "move", "x": 704, "y": 339}
|
||||||
|
{"time_stamp": 21034.9052672, "action": "move", "x": 704, "y": 337}
|
||||||
|
{"time_stamp": 21034.9145868, "action": "move", "x": 704, "y": 335}
|
||||||
|
{"time_stamp": 21034.9208561, "action": "move", "x": 704, "y": 334}
|
||||||
|
{"time_stamp": 21034.928931, "action": "move", "x": 704, "y": 333}
|
||||||
|
{"time_stamp": 21034.9374176, "action": "move", "x": 704, "y": 332}
|
||||||
|
{"time_stamp": 21034.9451258, "action": "move", "x": 704, "y": 330}
|
||||||
|
{"time_stamp": 21034.9528709, "action": "move", "x": 704, "y": 329}
|
||||||
|
{"time_stamp": 21034.9611476, "action": "move", "x": 704, "y": 328}
|
||||||
|
{"time_stamp": 21034.968991, "action": "move", "x": 704, "y": 327}
|
||||||
|
{"time_stamp": 21034.9768394, "action": "move", "x": 705, "y": 325}
|
||||||
|
{"time_stamp": 21034.9848553, "action": "move", "x": 705, "y": 324}
|
||||||
|
{"time_stamp": 21034.993121, "action": "move", "x": 705, "y": 323}
|
||||||
|
{"time_stamp": 21035.0007992, "action": "move", "x": 706, "y": 322}
|
||||||
|
{"time_stamp": 21035.0088762, "action": "move", "x": 707, "y": 320}
|
||||||
|
{"time_stamp": 21035.0166123, "action": "move", "x": 707, "y": 320}
|
||||||
|
{"time_stamp": 21035.0247724, "action": "move", "x": 708, "y": 318}
|
||||||
|
{"time_stamp": 21035.0335071, "action": "move", "x": 708, "y": 317}
|
||||||
|
{"time_stamp": 21035.0411458, "action": "move", "x": 709, "y": 317}
|
||||||
|
{"time_stamp": 21035.0491997, "action": "move", "x": 709, "y": 316}
|
||||||
|
{"time_stamp": 21035.0569637, "action": "move", "x": 711, "y": 314}
|
||||||
|
{"time_stamp": 21035.06496, "action": "move", "x": 711, "y": 313}
|
||||||
|
{"time_stamp": 21035.0726588, "action": "move", "x": 712, "y": 312}
|
||||||
|
{"time_stamp": 21035.0807214, "action": "move", "x": 713, "y": 311}
|
||||||
|
{"time_stamp": 21035.0888078, "action": "move", "x": 713, "y": 309}
|
||||||
|
{"time_stamp": 21035.0972443, "action": "move", "x": 713, "y": 309}
|
||||||
|
{"time_stamp": 21035.1048868, "action": "move", "x": 714, "y": 308}
|
||||||
|
{"time_stamp": 21035.1127551, "action": "move", "x": 715, "y": 307}
|
||||||
|
{"time_stamp": 21035.1208842, "action": "move", "x": 715, "y": 306}
|
||||||
|
{"time_stamp": 21035.1285261, "action": "move", "x": 715, "y": 306}
|
||||||
|
{"time_stamp": 21035.1366862, "action": "move", "x": 715, "y": 305}
|
||||||
|
{"time_stamp": 21035.1446592, "action": "move", "x": 716, "y": 305}
|
||||||
|
{"time_stamp": 21035.1528109, "action": "move", "x": 716, "y": 305}
|
||||||
|
{"time_stamp": 21035.1848109, "action": "move", "x": 716, "y": 304}
|
||||||
|
{"time_stamp": 21035.208994, "action": "move", "x": 717, "y": 304}
|
||||||
|
{"time_stamp": 21035.2571327, "action": "move", "x": 717, "y": 304}
|
||||||
|
{"time_stamp": 21035.2573543, "action": "click", "x": 717, "y": 304, "button": "left", "pressed": true}
|
||||||
|
{"time_stamp": 21035.3377191, "action": "move", "x": 717, "y": 304}
|
||||||
|
{"time_stamp": 21035.3379572, "action": "click", "x": 717, "y": 304, "button": "left", "pressed": false}
|
||||||
34
utils/image_processing/contour.py
Normal file
34
utils/image_processing/contour.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
# Load the image
|
||||||
|
image = cv2.imread('../../mm_agents/stackoverflow.png')
|
||||||
|
|
||||||
|
# Convert to grayscale
|
||||||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Apply adaptive thresholding to get a binary image
|
||||||
|
thresh = cv2.adaptiveThreshold(
|
||||||
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find contours
|
||||||
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
# Filter out contours that are not of cell size
|
||||||
|
# This is done by assuming that cells will have a relatively standard size
|
||||||
|
# The size filter is just a placeholder, real values depend on the actual image size
|
||||||
|
min_cell_size = 500
|
||||||
|
max_cell_size = 5000
|
||||||
|
cell_contours = [cnt for cnt in contours if min_cell_size < cv2.contourArea(cnt) < max_cell_size]
|
||||||
|
|
||||||
|
# Draw contours on the image
|
||||||
|
contour_output = image.copy()
|
||||||
|
cv2.drawContours(contour_output, cell_contours, -1, (0, 255, 0), 2)
|
||||||
|
|
||||||
|
# Display the image with cell contours
|
||||||
|
plt.figure(figsize=(12,6))
|
||||||
|
plt.imshow(cv2.cvtColor(contour_output, cv2.COLOR_BGR2RGB))
|
||||||
|
plt.title('Spreadsheet with Cell Contours')
|
||||||
|
plt.axis('off')
|
||||||
|
plt.show()
|
||||||
32
utils/image_processing/point_marking.py
Normal file
32
utils/image_processing/point_marking.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
|
||||||
|
def mark_point(image_path: str, x: int, y: int, radius: int = 5, color: str = 'red') -> str:
|
||||||
|
"""
|
||||||
|
Mark a point on an image and save the image.
|
||||||
|
"""
|
||||||
|
# Load the image
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# Create a draw object
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
|
||||||
|
# Draw a small circle to mark the point
|
||||||
|
draw.ellipse((x - radius, y - radius, x + radius, y + radius), fill=color, outline=color)
|
||||||
|
|
||||||
|
# Save the image with the point marked
|
||||||
|
marked_image_path = image_path[:-4] + '_marked' + image_path[-4:]
|
||||||
|
image.save(marked_image_path)
|
||||||
|
|
||||||
|
return marked_image_path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
image_path = 'chrome_start.png'
|
||||||
|
x = 100
|
||||||
|
y = 200
|
||||||
|
radius = 30
|
||||||
|
color = 'red'
|
||||||
|
|
||||||
|
marked_image_path = mark_point(image_path, x, y, radius, color)
|
||||||
|
print(f"Marked image saved to {marked_image_path}")
|
||||||
Reference in New Issue
Block a user