EvoCUA Update (2025.01.05) (#412)

* evocua init

* setup max_token

* evocua update

---------

Co-authored-by: xuetaofeng <xuetaofeng@meituan.com>
Co-authored-by: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com>
This commit is contained in:
蘑菇先生
2026-01-05 16:14:53 +08:00
committed by GitHub
parent 439e178a2e
commit 5ef8bdfa35
3 changed files with 110 additions and 24 deletions

View File

@@ -317,7 +317,26 @@ Previous actions:
args = tool_call["arguments"]
action = args["action"]
if action == "left_click":
def _clean_keys(raw_keys):
keys = raw_keys if isinstance(raw_keys, list) else [raw_keys]
cleaned_keys = []
for key in keys:
if isinstance(key, str):
if key.startswith("keys=["):
key = key[6:]
if key.endswith("]"):
key = key[:-1]
if key.startswith("['") or key.startswith('["'):
key = key[2:] if len(key) > 2 else key
if key.endswith("']") or key.endswith('"]'):
key = key[:-2] if len(key) > 2 else key
key = key.strip()
cleaned_keys.append(key)
else:
cleaned_keys.append(key)
return cleaned_keys
if action == "left_click" or action == "click":
if "coordinate" in args:
x, y = args["coordinate"]
adj_x, adj_y = adjust_coordinates(x, y)
@@ -355,6 +374,16 @@ Previous actions:
else:
pyautogui_code.append("pyautogui.doubleClick()")
elif action == "triple_click":
if "coordinate" in args:
x, y = args["coordinate"]
adj_x, adj_y = adjust_coordinates(x, y)
pyautogui_code.append(
f"pyautogui.tripleClick({adj_x}, {adj_y})"
)
else:
pyautogui_code.append("pyautogui.tripleClick()")
elif action == "type":
text = args.get("text", "")
@@ -383,24 +412,7 @@ Previous actions:
elif action == "key":
keys = args.get("keys", [])
if isinstance(keys, list):
cleaned_keys = []
for key in keys:
if isinstance(key, str):
if key.startswith("keys=["):
key = key[6:]
if key.endswith("]"):
key = key[:-1]
if key.startswith("['") or key.startswith('["'):
key = key[2:] if len(key) > 2 else key
if key.endswith("']") or key.endswith('"]'):
key = key[:-2] if len(key) > 2 else key
key = key.strip()
cleaned_keys.append(key)
else:
cleaned_keys.append(key)
keys = cleaned_keys
keys = _clean_keys(args.get("keys", []))
keys_str = ", ".join([f"'{key}'" for key in keys])
if len(keys) > 1:
@@ -408,6 +420,16 @@ Previous actions:
else:
pyautogui_code.append(f"pyautogui.press({keys_str})")
elif action == "key_down":
keys = _clean_keys(args.get("keys", []))
for k in keys:
pyautogui_code.append(f"pyautogui.keyDown('{k}')")
elif action == "key_up":
keys = _clean_keys(args.get("keys", []))
for k in reversed(keys):
pyautogui_code.append(f"pyautogui.keyUp('{k}')")
elif action == "scroll":
pixels = args.get("pixels", 0)
pyautogui_code.append(f"pyautogui.scroll({pixels})")
@@ -416,7 +438,15 @@ Previous actions:
pyautogui_code.append("WAIT")
elif action == "terminate":
pyautogui_code.append("DONE")
# Termination should respect status:
# - success -> DONE
# - failure -> FAIL
# Backward compatible: missing status defaults to success.
status = args.get("status", "success")
if str(status).lower() == "failure":
pyautogui_code.append("FAIL")
else:
pyautogui_code.append("DONE")
elif action == "mouse_move":
if "coordinate" in args:
@@ -481,7 +511,11 @@ Previous actions:
process_tool_call("\n".join(current_tool_call))
if not low_level_instruction and len(pyautogui_code) > 0:
action_type = pyautogui_code[0].split(".", 1)[1].split("(", 1)[0]
first_action = pyautogui_code[0]
if "." in first_action:
action_type = first_action.split(".", 1)[1].split("(", 1)[0]
else:
action_type = first_action.lower()
low_level_instruction = f"Performing {action_type} action"
return low_level_instruction, pyautogui_code