* autoglm-os initialize * clean code * chore: use proxy for download setup * feat(autoglm-os): add parameter to toggle images * fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel * update * add client_password * update multienv * fix * fix prompt * fix prompt * fix prompt * fix sys prompt * feat: use proxy in file evaluator * fix client_password * fix note_prompt * fix autoglm agent cmd type * fix * revert: fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel reverts commit bab5473eea1de0e61b0e1d68b23ce324a5b0ee57 * feat(autoglm): setup tools * fix(autoglm): remove second time of get a11y tree * add osworld server restart * Revert "add osworld server restart" This reverts commit 7bd9d84122e246ce2a26de0e49c25494244c2b3d. * fix _launch_setup * fix autoglm agent tools & xml tree * fix desktop_env * fix bug for tool name capitalization * fix: always use proxy for setup download * add fail after exceeding max turns * fix(autoglm): avoid adding image to message when screenshot is empty * fix maximize_window * fix maximize_window * fix maximize_window * fix import browsertools module bug * fix task proxy config bug * restore setup * refactor desktop env * restore image in provider * restore file.py * refactor desktop_env * quick fix * refactor desktop_env.step * fix our env reset * add max truns constraint * clean run script * clean lib_run_single.py --------- Co-authored-by: hanyullai <hanyullai@outlook.com> Co-authored-by: JingBh <jingbohao@yeah.net>
203 lines
8.1 KiB
Python
203 lines
8.1 KiB
Python
import inspect
|
|
import json
|
|
import os
|
|
import textwrap
|
|
|
|
current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
def generate_func(json_data):
|
|
# 收集所有类名和它们的函数
|
|
class_funcs = {}
|
|
no_class_funcs = []
|
|
cls_name = ""
|
|
|
|
for item in json_data:
|
|
if item["type"] == "function":
|
|
func = item["function"]
|
|
func_parts = func["name"].split(".")
|
|
|
|
if len(func_parts) == 2:
|
|
class_name, func_name = func_parts
|
|
if class_name not in class_funcs:
|
|
class_funcs[class_name] = []
|
|
class_funcs[class_name].append(item)
|
|
else:
|
|
no_class_funcs.append(item)
|
|
|
|
code = ""
|
|
|
|
# 生成有类的函数
|
|
for class_name, funcs in class_funcs.items():
|
|
code += f"class {class_name}:\n"
|
|
cls_name = class_name
|
|
for item in funcs:
|
|
func = item["function"]
|
|
func_name = func["name"].split(".")[-1]
|
|
description = func["description"]
|
|
params = func["parameters"]["properties"]
|
|
required = func["parameters"].get("required", [])
|
|
|
|
# 构建参数列表
|
|
param_list = ["cls"]
|
|
# 首先添加必需参数
|
|
for param_name in required:
|
|
param_list.append(f"{param_name}")
|
|
# 然后添加可选参数
|
|
for param_name in params:
|
|
if param_name not in required:
|
|
param_list.append(f"{param_name}") # 可选参数默认值设为None
|
|
|
|
# 构建函数定义
|
|
func_def = f" def {func_name}({', '.join(param_list)}):\n"
|
|
|
|
# 构建文档字符串
|
|
docstring = f' """\n {description}\n\n Args:\n'
|
|
if len(param_list) == 1: # 只有cls参数
|
|
docstring += " None\n"
|
|
else:
|
|
# 首先记录必需参数
|
|
for param_name in required:
|
|
param_type = params[param_name]["type"]
|
|
param_desc = params[param_name].get("description", "")
|
|
docstring += f" {param_name} ({param_type}): {param_desc}\n"
|
|
# 然后记录可选参数
|
|
for param_name in params:
|
|
if param_name not in required:
|
|
param_type = params[param_name]["type"]
|
|
param_desc = params[param_name].get("description", "")
|
|
docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
|
|
|
|
docstring += ' """\n'
|
|
|
|
code += func_def + docstring + "\n"
|
|
|
|
code += "\n"
|
|
|
|
# 生成没有类的函数
|
|
for item in no_class_funcs:
|
|
func = item["function"]
|
|
func_name = func["name"]
|
|
description = func["description"]
|
|
params = func["parameters"]["properties"]
|
|
required = func["parameters"].get("required", [])
|
|
|
|
# 构建参数列表
|
|
param_list = []
|
|
# 首先添加必需参数
|
|
for param_name in required:
|
|
param_list.append(f"{param_name}")
|
|
# 然后添加可选参数
|
|
for param_name in params:
|
|
if param_name not in required:
|
|
param_list.append(f"{param_name}")
|
|
|
|
# 构建函数定义
|
|
func_def = f"def {func_name}({', '.join(param_list)}):\n"
|
|
|
|
# 构建文档字符串
|
|
docstring = f' """\n {description}\n\n Args:\n'
|
|
if not param_list:
|
|
docstring += " None\n"
|
|
else:
|
|
# 首先记录必需参数
|
|
for param_name in required:
|
|
param_type = params[param_name]["type"]
|
|
param_desc = params[param_name].get("description", "")
|
|
docstring += f" {param_name} ({param_type}): {param_desc}\n"
|
|
# 然后记录可选参数
|
|
for param_name in params:
|
|
if param_name not in required:
|
|
param_type = params[param_name]["type"]
|
|
param_desc = params[param_name].get("description", "")
|
|
docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
|
|
|
|
docstring += ' """\n'
|
|
|
|
code += func_def + docstring + "\n"
|
|
|
|
return code.strip(), cls_name
|
|
|
|
|
|
setup_prompt = """You are an agent which follow my instruction and perform desktop computer tasks as instructed.
|
|
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
|
|
For each step, you will get an observation of the desktop by 1) screenshot; 2) current application name; 3) accessibility tree, which is based on AT-SPI library; 4) application info; 5) last action result.
|
|
You should first generate a plan for completing the task, confirm the previous results, reflect on the current status, then generate operations to complete the task in python-style pseudo code using the predefined functions.
|
|
|
|
Your output should STRICTLY follow the format:
|
|
<think>
|
|
{**YOUR-PLAN-AND-THINKING**}
|
|
</think>
|
|
```python
|
|
{**ONE-LINE-OF-CODE**}
|
|
```"""
|
|
|
|
func_def_tool_template = """You will be provided access to the following methods to interact with the UI:
|
|
1. class Agent, a grounding agent which provides basic action space to interact with desktop.
|
|
2. class {tool_class_name}, which provides tools to interact with the current application {app_name}.
|
|
|
|
Here are the defination of the classes:
|
|
```python
|
|
{class_content}
|
|
```"""
|
|
|
|
func_def_template = """You will be provided access to the following methods to interact with the UI:
|
|
|
|
```python
|
|
{class_content}
|
|
```"""
|
|
|
|
note_prompt = """* Note:
|
|
- Your code should be wrapped in ```python```, and your plan and thinking should be wrapped in <think></think>.
|
|
- Only **ONE-LINE-OF-CODE** at a time.
|
|
- Each code block is context independent, and variables from the previous round cannot be used in the next round.
|
|
- Do not put anything other than python code in ```python```.
|
|
- You **can only use the above methods to interact with the UI**, do not invent new methods.
|
|
- Return with `Agent.exit(success=True)` immediately after the task is completed.
|
|
- If you think cannot complete the task, **DO NOT keep repeating actions, just return with `Agent.exit(success=False)`.**
|
|
- The computer's environment is Linux, e.g., Desktop path is '/home/user/Desktop'
|
|
- My computer's password is '{client_password}', feel free to use it when you need sudo rights"""
|
|
|
|
|
|
class Prompt:
|
|
@staticmethod
|
|
def construct_procedural_memory(agent_class, app_name=None, client_password="password"):
|
|
agent_class_content = "Class Agent:"
|
|
for attr_name in dir(agent_class):
|
|
attr = getattr(agent_class, attr_name)
|
|
if callable(attr) and hasattr(attr, "is_agent_action"):
|
|
# Use inspect to get the full function signature
|
|
signature = inspect.signature(attr)
|
|
agent_class_content += f"""
|
|
def {attr_name}{signature}:
|
|
'''{attr.__doc__}'''
|
|
"""
|
|
|
|
if app_name is not None:
|
|
tool_path = os.path.join(current_dir, "tools", "apis", f"{app_name.lower()}.json")
|
|
with open(tool_path, "r") as f:
|
|
json_data = json.load(f)
|
|
|
|
tool_class_content, tool_class_name = generate_func(json_data)
|
|
|
|
agent_class_content += "\n\n{}".format(tool_class_content)
|
|
func_def_prompt = func_def_tool_template.format(
|
|
class_content=agent_class_content.strip(),
|
|
tool_class_name=tool_class_name,
|
|
app_name=app_name,
|
|
client_password=client_password,
|
|
)
|
|
else:
|
|
func_def_prompt = func_def_template.format(class_content=agent_class_content.strip())
|
|
note_prompt_formatted = note_prompt.format(client_password=client_password)
|
|
|
|
# procedural_memory = f"{setup_prompt}\n\n{func_def_prompt}\n\n{note_prompt}".strip()
|
|
# return procedural_memory
|
|
return setup_prompt, func_def_prompt, note_prompt_formatted
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from grounding_agent import GroundingAgent
|
|
|
|
print(Prompt.construct_procedural_memory(GroundingAgent, "vlc"))
|