fix #210: add a11y_tree support to UITARSAgent (#346)

This commit is contained in:
eun2ce
2025-09-26 19:25:28 +09:00
committed by GitHub
parent 6827949418
commit 5eb5417188

View File

@@ -506,17 +506,18 @@ class UITARSAgent:
if last_action_after_obs is not None and self.infer_mode == "double_image": if last_action_after_obs is not None and self.infer_mode == "double_image":
self.history_images.append(last_action_after_obs["screenshot"]) self.history_images.append(last_action_after_obs["screenshot"])
self.history_images.append(obs["screenshot"])
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]: if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
base64_image = obs["screenshot"] self.history_images.append(obs["screenshot"])
if self.observation_type in ["screenshot", "screenshot_a11y_tree", "a11y_tree"]:
base64_image = obs["screenshot"] if self.observation_type in ["screenshot", "screenshot_a11y_tree"] else None
try: try:
linearized_accessibility_tree = ( linearized_accessibility_tree = (
linearize_accessibility_tree( linearize_accessibility_tree(
accessibility_tree=obs["accessibility_tree"], accessibility_tree=obs["accessibility_tree"],
platform=self.platform, platform=self.platform,
) )
if self.observation_type == "screenshot_a11y_tree" if self.observation_type in ["screenshot_a11y_tree", "a11y_tree"]
else None else None
) )
except: except:
@@ -535,7 +536,14 @@ class UITARSAgent:
"accessibility_tree": linearized_accessibility_tree, "accessibility_tree": linearized_accessibility_tree,
} }
) )
else: elif self.observation_type == "a11y_tree":
self.observations.append(
{
"screenshot": None,
"accessibility_tree": linearized_accessibility_tree,
}
)
else: # screenshot
self.observations.append( self.observations.append(
{"screenshot": base64_image, "accessibility_tree": None} {"screenshot": base64_image, "accessibility_tree": None}
) )
@@ -760,4 +768,4 @@ class UITARSAgent:
self.actions = [] self.actions = []
self.observations = [] self.observations = []
self.history_images = [] self.history_images = []
self.history_responses = [] self.history_responses = []