refactor: remove AWSVMManagerWithProxy and integrate proxy support directly into AWSVMManager for streamlined VM allocation;

minor fix on openai_cua_agent
2025-06-06 02:55:50 +08:00
parent 8b7727d955
commit 8373f7cff2
4 changed files with 72 additions and 344 deletions
--- a/mm_agents/openai_cua_agent.py
+++ b/mm_agents/openai_cua_agent.py
@@ -309,7 +309,21 @@ class OpenAICUAAgent:
                logger.error(f"OpenAI API error: {str(e)}")
                new_screenshot = self.env._get_obs()
                new_screenshot_base64 = base64.b64encode(new_screenshot["screenshot"]).decode('utf-8')
-                self.cua_messages[-1]["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
+                
+                # Update the image in the last message based on its structure
+                last_message = self.cua_messages[-1]
+                if "output" in last_message:
+                    # Computer call output message structure
+                    last_message["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
+                elif "content" in last_message:
+                    # User message structure - find and update the image content
+                    for content_item in last_message["content"]:
+                        if content_item.get("type") == "input_image":
+                            content_item["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
+                            break
+                else:
+                    logger.warning("Unknown message structure, cannot update screenshot")
+                
                retry_count += 1
                time.sleep(1)
        raise Exception("Failed to make OpenAI API call after 3 retries")
@@ -452,10 +466,7 @@ class OpenAICUAAgent:
                    logger.warning("Empty text for type action")
                    return "import pyautogui\n# Empty text, no action taken"
                
-                pattern = r"(?<!\\)'"
-                text = re.sub(pattern, r"\\'", text)
-                
-                # 使用三重引号来确保字符串内容不会破坏格式
+                # Use repr() to properly escape the string content without double-escaping
                pyautogui_code = f"""import pyautogui\npyautogui.typewrite({repr(text)})"""
                logger.info(f"Pyautogui code: {pyautogui_code}")
                return pyautogui_code