Add Claude Sonnet 4.5 support and improve action handling (#362)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Qichen Fu
2025-11-13 21:54:32 -08:00
committed by GitHub
parent 3167339e45
commit 903ed36715
8 changed files with 578 additions and 129 deletions

View File

@@ -391,12 +391,12 @@ class DesktopEnv(gym.Env):
logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
# handle the special actions
if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action['action_type'] in ['WAIT', 'FAIL', 'DONE']):
if action == 'WAIT':
if action == 'WAIT' or (type(action) == dict and action.get('action_type') == 'WAIT'):
time.sleep(pause)
elif action == 'FAIL':
elif action == 'FAIL' or (type(action) == dict and action.get('action_type') == 'FAIL'):
done = True
info = {"fail": True}
elif action == 'DONE':
elif action == 'DONE' or (type(action) == dict and action.get('action_type') == 'DONE'):
done = True
info = {"done": True}
@@ -404,7 +404,7 @@ class DesktopEnv(gym.Env):
# the set of all possible actions defined in the action representation
self.controller.execute_action(action)
elif self.action_space == "pyautogui" or self.action_space == "claude_computer_use":
if action in ['WAIT', 'FAIL', 'DONE']:
if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action.get('action_type') in ['WAIT', 'FAIL', 'DONE']):
self.controller.execute_action(action)
else:
# the set of all possible python commands insides `pyautogui`
@@ -434,13 +434,16 @@ class DesktopEnv(gym.Env):
self.is_environment_used = True
if self.evaluator['func'] == "infeasible":
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
return 1
else:
return 0
if len(self.action_history) > 0:
last_action = self.action_history[-1]
if last_action == "FAIL" or (type(last_action) == dict and last_action.get('action_type') == 'FAIL'):
return 1
return 0
else:
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
return 0
if len(self.action_history) > 0:
last_action = self.action_history[-1]
if last_action == "FAIL" or (type(last_action) == dict and last_action.get('action_type') == 'FAIL'):
return 0
if type(self.metric) == list:
# Multiple metrics to evaluate whether the task is successfully completed