Files
sci-gui-agent-benchmark/utils/ducktrack.py
2023-12-04 00:51:33 +08:00

225 lines
8.3 KiB
Python

import sys, pathlib;
sys.path.append(str(pathlib.Path(__file__).parents[1]))
import os
import math
import json
from typing import List
from copy import deepcopy
pynput2pyautogui_key = {
"alt_l": "altleft",
"alt_r": "altright",
}
class DuckTrackEventActionConverter:
def __init__(self, ):
""""""
### Enumerations ###
def move_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts a mouse move event to its corresponding action."""
if action_space == "computer_13":
return {
"action_type": "MOVE_TO",
"parameters": {
"x": event["x"],
"y": event["y"]
}
}
elif action_space == "pyautogui":
return "pyautogui.moveTo({}, {})".format(event["x"], event["y"])
def click_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts a mouse click event to its corresponding action."""
action = {
"action_type": None,
"parameters": {
"button": None
}
}
mouse_button = event["button"]
mouse_pressed = event["pressed"]
if mouse_pressed:
action["action_type"] = "MOUSE_DOWN"
elif not mouse_pressed:
action["action_type"] = "MOUSE_UP"
else:
raise NotImplementedError(mouse_pressed)
if mouse_button in ["left", "right", "middle"]:
action["parameters"]["button"] = mouse_button
else:
raise NotImplementedError(mouse_button)
return action
def press_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts a key down event to its corresponding action."""
# NOTE: the `key down`, `press` have the same meaning here, while different in pyautogui
return {
"action_type": "KEY_DOWN",
"parameters": {
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
event["name"]]
}
}
def release_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts a key release event to its corresponding action."""
return {
"action_type": "KEY_UP",
"parameters": {
"key": event["name"] if event["name"] not in pynput2pyautogui_key else pynput2pyautogui_key[
event["name"]]
}
}
def scroll_event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts a scroll event to its corresponding action."""
return {
"action_type": "SCROLL",
"parameters": {
"dx": event["dx"],
"dy": event["dy"]
}
}
def event_to_action(self, event: dict, action_space: str = "computer_13"):
"""Converts an event to its corresponding action based on the event type."""
if event["action"] == "move":
return self.move_event_to_action(event)
elif event["action"] == "click":
return self.click_event_to_action(event)
elif event["action"] == "press":
return self.press_event_to_action(event)
elif event["action"] == "release":
return self.release_event_to_action(event)
elif event["action"] == "scroll":
return self.scroll_event_to_action(event)
else:
raise NotImplementedError(event["action"])
### Compressing ###
def compress_mouse_move(self, data: List[dict], index: int):
"""Compresses consecutive mouse move events into first and last move events."""
first_move, last_move = data[index], data[index]
while index < len(data) and data[index]["action"] == "move":
last_move = data[index]
index += 1
return first_move, last_move, index
### Converting ###
def ducktrack_event_file_to_action(self, ducktrack_event_file: str, out_file: str, compress_move: bool = True):
"""Converts DuckTrack event data to a list of actions and saves them to a file."""
if not os.path.exists(ducktrack_event_file):
raise FileNotFoundError(ducktrack_event_file)
with open(ducktrack_event_file, 'r') as file:
events = [json.loads(line) for line in file]
# Save the compressed actions in a list
result = []
index = 0
presses_to_skip = 0
releases_to_skip = 0
# Compress the mouse move events
while index < len(events):
event = events[index]
def do_mouse_press(button: str):
for j, second_event in enumerate(events[index + 1:]):
# make sure the time between mouse clicks is less than 500ms
if second_event["time_stamp"] - event["time_stamp"] > 0.5:
break
if "x" in second_event and "y" in second_event:
# if the mouse moves out of the click radius/rectangle, it is not a click sequence
if math.sqrt((second_event["y"] - event["y"]) ** 2 +
(second_event["x"] - event["x"]) ** 2) > 4:
break
if second_event["action"] == "click" and second_event["pressed"]:
for k, third_event in enumerate(events[index + j + 2:]):
if third_event["time_stamp"] - second_event["time_stamp"] > 0.5:
break
if "x" in third_event and "y" in third_event:
if math.sqrt((third_event["y"] - event["y"]) ** 2 +
(third_event["x"] - event["x"]) ** 2) > 5:
break
if third_event["action"] == "click" and third_event["pressed"]:
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"num_clicks": 3
}
})
return 2, 2
result.append({
"action_type": "CLICK",
"parameters": {
"button": button,
"num_clicks": 2
}
})
return 1, 1
result.append({
"action_type": "MOUSE_DOWN",
"parameters": {
"button": button
}
})
return 0, 0
if event["action"] == "move" and compress_move:
first_move, last_move, index = self.compress_mouse_move(events, index)
result.extend([self.event_to_action(last_move)])
elif event["action"] == "click":
button = event["button"]
if event["pressed"]:
if presses_to_skip == 0:
presses, releases = do_mouse_press(button)
presses_to_skip += presses
releases_to_skip += releases
else:
presses_to_skip -= 1
else:
if releases_to_skip == 0:
result.append({
"action_type": "MOUSE_UP",
"parameters": {
"button": button
}
})
else:
releases_to_skip -= 1
index += 1
else:
result.append(self.event_to_action(event))
index += 1
with open(out_file, "w") as f:
json.dump(result, f)
if __name__ == "__main__":
converter = DuckTrackEventActionConverter()
converter.ducktrack_event_file_to_action(
ducktrack_event_file="events_calc.jsonl",
out_file="events_calc.json",
compress_move=True
)