feat: enhance AnthropicAgent with start_coordinate handling and modifier key support

- Added support for an optional start_coordinate parameter to facilitate drag actions from a specified starting point.
- Implemented validation for start_coordinate to ensure it is a tuple of two integers.
- Enhanced click actions to handle modifier keys, allowing for more complex interactions.
- Ensured existing code logic remains unchanged while improving functionality and usability.
This commit is contained in:
Timothyxxx
2025-08-12 05:34:18 +00:00
parent 7418f5cf2f
commit d2ae0f697d

View File

@@ -101,6 +101,7 @@ class AnthropicAgent:
text = function_args.get("text")
coordinate = function_args.get("coordinate")
start_coordinate = function_args.get("start_coordinate")
scroll_direction = function_args.get("scroll_direction")
scroll_amount = function_args.get("scroll_amount")
duration = function_args.get("duration")
@@ -111,6 +112,11 @@ class AnthropicAgent:
int(coordinate[0] * self.resize_factor[0]),
int(coordinate[1] * self.resize_factor[1])
)
if start_coordinate and self.resize_factor:
start_coordinate = (
int(start_coordinate[0] * self.resize_factor[0]),
int(start_coordinate[1] * self.resize_factor[1])
)
if action == "left_mouse_down":
result += "pyautogui.mouseDown()\n"
@@ -145,6 +151,16 @@ class AnthropicAgent:
)
expected_outcome = f"Mouse moved to ({x},{y})."
elif action == "left_click_drag":
# If start_coordinate is provided, validate and move to start before dragging
if start_coordinate:
if not isinstance(start_coordinate, (list, tuple)) or len(start_coordinate) != 2:
raise ValueError(f"{start_coordinate} must be a tuple of length 2")
if not all(isinstance(i, int) for i in start_coordinate):
raise ValueError(f"{start_coordinate} must be a tuple of ints")
start_x, start_y = start_coordinate[0], start_coordinate[1]
result += (
f"pyautogui.moveTo({start_x}, {start_y}, duration={duration or 0.5})\n"
)
result += (
f"pyautogui.dragTo({x}, {y}, duration={duration or 0.5})\n"
)
@@ -209,6 +225,12 @@ class AnthropicAgent:
# Handle click actions
elif action in ("left_click", "right_click", "double_click", "middle_click", "left_press", "triple_click"):
# Handle modifier keys during click if specified
if text:
keys = text.split('+')
for key in keys:
key = key.strip().lower()
result += f"pyautogui.keyDown('{key}')\n"
if coordinate is not None:
x, y = coordinate
if action == "left_click":
@@ -241,6 +263,12 @@ class AnthropicAgent:
result += ("pyautogui.mouseUp()\n")
elif action == "triple_click":
result += ("pyautogui.tripleClick()\n")
# Release modifier keys after click
if text:
keys = text.split('+')
for key in reversed(keys):
key = key.strip().lower()
result += f"pyautogui.keyUp('{key}')\n"
expected_outcome = "Click action finished"
elif action == "wait":