Implement heuristic cutting on the accessibility tree to get the important nodes; Finish accessibility tree text agent
This commit is contained in:
@@ -1,10 +1,12 @@
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes
|
||||
from mm_agents.gpt_4_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
|
||||
from mm_agents.gpt_4_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
|
||||
|
||||
@@ -81,9 +83,9 @@ class GPT4_Agent:
|
||||
{
|
||||
"type": "text",
|
||||
"text": {
|
||||
"computer_13": SYS_PROMPT_ACTION,
|
||||
"pyautogui": SYS_PROMPT_CODE
|
||||
}[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
|
||||
"computer_13": SYS_PROMPT_ACTION,
|
||||
"pyautogui": SYS_PROMPT_CODE
|
||||
}[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
|
||||
},
|
||||
]
|
||||
}
|
||||
@@ -94,12 +96,27 @@ class GPT4_Agent:
|
||||
Predict the next action(s) based on the current observation.
|
||||
"""
|
||||
accessibility_tree = obs["accessibility_tree"]
|
||||
|
||||
leaf_nodes = find_leaf_nodes(accessibility_tree)
|
||||
filtered_nodes = filter_nodes(leaf_nodes)
|
||||
|
||||
linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
|
||||
# Linearize the accessibility tree nodes into a table format
|
||||
|
||||
for node in filtered_nodes:
|
||||
linearized_accessibility_tree += node.tag + "\t"
|
||||
linearized_accessibility_tree += node.attrib.get('name') + "\t"
|
||||
linearized_accessibility_tree += node.attrib.get(
|
||||
'{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
|
||||
linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
|
||||
|
||||
self.trajectory.append({
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(accessibility_tree)
|
||||
"text": "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
|
||||
linearized_accessibility_tree)
|
||||
}
|
||||
]
|
||||
})
|
||||
@@ -117,7 +134,16 @@ class GPT4_Agent:
|
||||
"messages": self.trajectory,
|
||||
"max_tokens": self.max_tokens
|
||||
}
|
||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers,
|
||||
json=payload)
|
||||
break
|
||||
except:
|
||||
print("Failed to generate response, retrying...")
|
||||
time.sleep(5)
|
||||
pass
|
||||
|
||||
try:
|
||||
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
|
||||
|
||||
Reference in New Issue
Block a user