Dev/uitars (#132)

* init uitars * change agent class name * FIX: return bug in agent predict
2025-02-14 11:17:37 +08:00
parent 75601efc6a
commit 339a13e1d5
1 changed files with 11 additions and 7 deletions
--- a/mm_agents/uitars_agent.py
+++ b/mm_agents/uitars_agent.py
@@ -688,7 +688,8 @@ class UITARSAgent:
                try_times -= 1
                
        if prediction is None:
-            return "client error", ["DONE"], []
+            return "client error", ["DONE"]
+
        
        self.history_responses.append(prediction)
        self.thoughts.append(prediction)
@@ -702,7 +703,7 @@ class UITARSAgent:
            )
        except Exception as e:
            print(f"Parsing action error: {prediction}, with error:\n{e}")
-            return f"Parsing action error: {prediction}, with error:\n{e}", ["DONE"], []
+            return f"Parsing action error: {prediction}, with error:\n{e}", ["DONE"]

        actions = []
        for parsed_response in parsed_responses:
@@ -710,19 +711,21 @@ class UITARSAgent:

                if parsed_response["action_type"] == FINISH_WORD:
                    self.actions.append(actions)
-                    return prediction, ["DONE"], parsed_responses
+
+                    return prediction, ["DONE"]
                
                elif parsed_response["action_type"] == WAIT_WORD:
                    self.actions.append(actions)
-                    return prediction, ["WAIT"], parsed_responses
+                    return prediction, ["WAIT"]
                
                elif parsed_response["action_type"] == ENV_FAIL_WORD:
                    self.actions.append(actions)
-                    return prediction, ["FAIL"], parsed_responses 
+                    return prediction, ["FAIL"]

                elif parsed_response["action_type"] == CALL_USER:
                    self.actions.append(actions)
-                    return prediction, ["FAIL"], parsed_responses 
+                    return prediction, ["FAIL"]
+
            
            pyautogui_code = parsing_response_to_pyautogui_code(
                parsed_response,
@@ -738,7 +741,8 @@ class UITARSAgent:
            # Default to FAIL if exceed max steps
            actions = ["FAIL"]

-        return prediction, actions, parsed_responses
+        return prediction, actions
+

    @backoff.on_exception(
        backoff.constant,