Add Claude Sonnet 4.5 support and improve action handling (#362)

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
2025-11-13 21:54:32 -08:00
parent 3167339e45
commit 903ed36715
8 changed files with 578 additions and 129 deletions
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -238,12 +238,17 @@ class PythonController:
            "returncode": -1
        }

-    def execute_action(self, action: Dict[str, Any]):
+    def execute_action(self, action):
        """
        Executes an action on the server computer.
        """
+        # Handle string actions
        if action in ['WAIT', 'FAIL', 'DONE']:
            return
+        
+        # Handle dictionary actions
+        if type(action) == dict and action.get('action_type') in ['WAIT', 'FAIL', 'DONE']:
+            return

        action_type = action["action_type"]
        parameters = action["parameters"] if "parameters" in action else {param: action[param] for param in action if param != 'action_type'}
--- a/desktop_env/desktop_env.py
+++ b/desktop_env/desktop_env.py
@@ -391,12 +391,12 @@ class DesktopEnv(gym.Env):
        logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
        # handle the special actions
        if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action['action_type'] in ['WAIT', 'FAIL', 'DONE']):
-            if action == 'WAIT':
+            if action == 'WAIT' or (type(action) == dict and action.get('action_type') == 'WAIT'):
                time.sleep(pause)
-            elif action == 'FAIL':
+            elif action == 'FAIL' or (type(action) == dict and action.get('action_type') == 'FAIL'):
                done = True
                info = {"fail": True}
-            elif action == 'DONE':
+            elif action == 'DONE' or (type(action) == dict and action.get('action_type') == 'DONE'):
                done = True
                info = {"done": True}

@@ -404,7 +404,7 @@ class DesktopEnv(gym.Env):
            # the set of all possible actions defined in the action representation
            self.controller.execute_action(action)
        elif self.action_space == "pyautogui" or self.action_space == "claude_computer_use":
-            if action in ['WAIT', 'FAIL', 'DONE']:
+            if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action.get('action_type') in ['WAIT', 'FAIL', 'DONE']):
                self.controller.execute_action(action)
            else:
                # the set of all possible python commands insides `pyautogui`
@@ -434,13 +434,16 @@ class DesktopEnv(gym.Env):
            self.is_environment_used = True

        if self.evaluator['func'] == "infeasible":
-            if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
-                return 1
-            else:
-                return 0
+            if len(self.action_history) > 0:
+                last_action = self.action_history[-1]
+                if last_action == "FAIL" or (type(last_action) == dict and last_action.get('action_type') == 'FAIL'):
+                    return 1
+            return 0
        else:
-            if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
-                return 0
+            if len(self.action_history) > 0:
+                last_action = self.action_history[-1]
+                if last_action == "FAIL" or (type(last_action) == dict and last_action.get('action_type') == 'FAIL'):
+                    return 0

        if type(self.metric) == list:
            # Multiple metrics to evaluate whether the task is successfully completed
--- a/desktop_env/providers/aws/manager.py
+++ b/desktop_env/providers/aws/manager.py
@@ -10,7 +10,7 @@ from desktop_env.providers.aws.config import ENABLE_TTL, DEFAULT_TTL_MINUTES, AW
 from desktop_env.providers.aws.scheduler_utils import schedule_instance_termination


-INSTANCE_TYPE = "t3.medium" 
+INSTANCE_TYPE = "t3.xlarge" 

 # Load environment variables from .env file
 dotenv.load_dotenv()
@@ -40,9 +40,9 @@ DEFAULT_REGION = "us-east-1"
 # todo: public the AMI images
 IMAGE_ID_MAP = {
    "us-east-1": {
-        # (1920, 1080): "ami-0d23263edb96951d8"
+        (1920, 1080): "ami-0d23263edb96951d8",
        # For CoACT-1, uncomment to use the following AMI
-        (1920, 1080): "ami-0b505e9d0d99ba88c"
+        # (1920, 1080): "ami-0b505e9d0d99ba88c"
    },
    "ap-east-1": {
        (1920, 1080): "ami-06850864d18fad836"