feat: enhance AnthropicAgent with start_coordinate handling and modifier key support
- Added support for an optional start_coordinate parameter to facilitate drag actions from a specified starting point. - Implemented validation for start_coordinate to ensure it is a tuple of two integers. - Enhanced click actions to handle modifier keys, allowing for more complex interactions. - Ensured existing code logic remains unchanged while improving functionality and usability.
This commit is contained in:
@@ -101,6 +101,7 @@ class AnthropicAgent:
|
||||
|
||||
text = function_args.get("text")
|
||||
coordinate = function_args.get("coordinate")
|
||||
start_coordinate = function_args.get("start_coordinate")
|
||||
scroll_direction = function_args.get("scroll_direction")
|
||||
scroll_amount = function_args.get("scroll_amount")
|
||||
duration = function_args.get("duration")
|
||||
@@ -111,6 +112,11 @@ class AnthropicAgent:
|
||||
int(coordinate[0] * self.resize_factor[0]),
|
||||
int(coordinate[1] * self.resize_factor[1])
|
||||
)
|
||||
if start_coordinate and self.resize_factor:
|
||||
start_coordinate = (
|
||||
int(start_coordinate[0] * self.resize_factor[0]),
|
||||
int(start_coordinate[1] * self.resize_factor[1])
|
||||
)
|
||||
|
||||
if action == "left_mouse_down":
|
||||
result += "pyautogui.mouseDown()\n"
|
||||
@@ -145,6 +151,16 @@ class AnthropicAgent:
|
||||
)
|
||||
expected_outcome = f"Mouse moved to ({x},{y})."
|
||||
elif action == "left_click_drag":
|
||||
# If start_coordinate is provided, validate and move to start before dragging
|
||||
if start_coordinate:
|
||||
if not isinstance(start_coordinate, (list, tuple)) or len(start_coordinate) != 2:
|
||||
raise ValueError(f"{start_coordinate} must be a tuple of length 2")
|
||||
if not all(isinstance(i, int) for i in start_coordinate):
|
||||
raise ValueError(f"{start_coordinate} must be a tuple of ints")
|
||||
start_x, start_y = start_coordinate[0], start_coordinate[1]
|
||||
result += (
|
||||
f"pyautogui.moveTo({start_x}, {start_y}, duration={duration or 0.5})\n"
|
||||
)
|
||||
result += (
|
||||
f"pyautogui.dragTo({x}, {y}, duration={duration or 0.5})\n"
|
||||
)
|
||||
@@ -209,6 +225,12 @@ class AnthropicAgent:
|
||||
|
||||
# Handle click actions
|
||||
elif action in ("left_click", "right_click", "double_click", "middle_click", "left_press", "triple_click"):
|
||||
# Handle modifier keys during click if specified
|
||||
if text:
|
||||
keys = text.split('+')
|
||||
for key in keys:
|
||||
key = key.strip().lower()
|
||||
result += f"pyautogui.keyDown('{key}')\n"
|
||||
if coordinate is not None:
|
||||
x, y = coordinate
|
||||
if action == "left_click":
|
||||
@@ -241,6 +263,12 @@ class AnthropicAgent:
|
||||
result += ("pyautogui.mouseUp()\n")
|
||||
elif action == "triple_click":
|
||||
result += ("pyautogui.tripleClick()\n")
|
||||
# Release modifier keys after click
|
||||
if text:
|
||||
keys = text.split('+')
|
||||
for key in reversed(keys):
|
||||
key = key.strip().lower()
|
||||
result += f"pyautogui.keyUp('{key}')\n"
|
||||
expected_outcome = "Click action finished"
|
||||
|
||||
elif action == "wait":
|
||||
|
||||
Reference in New Issue
Block a user