refactor: remove AWSVMManagerWithProxy and integrate proxy support directly into AWSVMManager for streamlined VM allocation;

minor fix on openai_cua_agent
This commit is contained in:
Timothyxxx
2025-06-06 02:55:50 +08:00
parent 8b7727d955
commit 8373f7cff2
4 changed files with 72 additions and 344 deletions

View File

@@ -309,7 +309,21 @@ class OpenAICUAAgent:
logger.error(f"OpenAI API error: {str(e)}")
new_screenshot = self.env._get_obs()
new_screenshot_base64 = base64.b64encode(new_screenshot["screenshot"]).decode('utf-8')
self.cua_messages[-1]["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
# Update the image in the last message based on its structure
last_message = self.cua_messages[-1]
if "output" in last_message:
# Computer call output message structure
last_message["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
elif "content" in last_message:
# User message structure - find and update the image content
for content_item in last_message["content"]:
if content_item.get("type") == "input_image":
content_item["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
break
else:
logger.warning("Unknown message structure, cannot update screenshot")
retry_count += 1
time.sleep(1)
raise Exception("Failed to make OpenAI API call after 3 retries")
@@ -452,10 +466,7 @@ class OpenAICUAAgent:
logger.warning("Empty text for type action")
return "import pyautogui\n# Empty text, no action taken"
pattern = r"(?<!\\)'"
text = re.sub(pattern, r"\\'", text)
# 使用三重引号来确保字符串内容不会破坏格式
# Use repr() to properly escape the string content without double-escaping
pyautogui_code = f"""import pyautogui\npyautogui.typewrite({repr(text)})"""
logger.info(f"Pyautogui code: {pyautogui_code}")
return pyautogui_code