fix #210: add a11y_tree support to UITARSAgent (#346)

This commit is contained in:
eun2ce
2025-09-26 19:25:28 +09:00
committed by GitHub
parent 6827949418
commit 5eb5417188

View File

@@ -506,17 +506,18 @@ class UITARSAgent:
if last_action_after_obs is not None and self.infer_mode == "double_image":
self.history_images.append(last_action_after_obs["screenshot"])
self.history_images.append(obs["screenshot"])
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
base64_image = obs["screenshot"]
self.history_images.append(obs["screenshot"])
if self.observation_type in ["screenshot", "screenshot_a11y_tree", "a11y_tree"]:
base64_image = obs["screenshot"] if self.observation_type in ["screenshot", "screenshot_a11y_tree"] else None
try:
linearized_accessibility_tree = (
linearize_accessibility_tree(
accessibility_tree=obs["accessibility_tree"],
platform=self.platform,
)
if self.observation_type == "screenshot_a11y_tree"
if self.observation_type in ["screenshot_a11y_tree", "a11y_tree"]
else None
)
except:
@@ -535,7 +536,14 @@ class UITARSAgent:
"accessibility_tree": linearized_accessibility_tree,
}
)
else:
elif self.observation_type == "a11y_tree":
self.observations.append(
{
"screenshot": None,
"accessibility_tree": linearized_accessibility_tree,
}
)
else: # screenshot
self.observations.append(
{"screenshot": base64_image, "accessibility_tree": None}
)
@@ -760,4 +768,4 @@ class UITARSAgent:
self.actions = []
self.observations = []
self.history_images = []
self.history_responses = []
self.history_responses = []