162 lines
6.9 KiB
Python
162 lines
6.9 KiB
Python
COMPUTER_USE_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
|
|
## Output Format
|
|
```
|
|
Thought: ...
|
|
Action: ...
|
|
```
|
|
|
|
## Action Space
|
|
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
|
|
|
## Note
|
|
- Use {language} in `Thought` part.
|
|
- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
|
|
- My computer's password is 'password', feel free to use it when you need sudo rights.
|
|
|
|
## User Instruction
|
|
{instruction}
|
|
"""
|
|
|
|
COMPUTER_USE_PROMPT_WITH_CALL_USER = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
|
|
## Output Format
|
|
```
|
|
Thought: ...
|
|
Action: ...
|
|
```
|
|
|
|
## Action Space
|
|
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
|
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
|
|
|
## Note
|
|
- Use {language} in `Thought` part.
|
|
- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
|
|
- My computer's password is 'password', feel free to use it when you need sudo rights.
|
|
|
|
## User Instruction
|
|
{instruction}
|
|
"""
|
|
|
|
UITARS_ACTION_SPACE = """
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished()
|
|
"""
|
|
|
|
UITARS_CALL_USR_ACTION_SPACE = """
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished()
|
|
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
|
"""
|
|
|
|
UITARS_NORMAL_ACTION_SPACE = """
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
|
"""
|
|
|
|
UITARS_USR_PROMPT_NOTHOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
## Output Format
|
|
```
|
|
Action: ...
|
|
```
|
|
## Action Space
|
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
|
hotkey(key='')
|
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
finished()
|
|
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
|
## User Instruction
|
|
{instruction}
|
|
"""
|
|
|
|
UITARS_USR_PROMPT_THOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
|
|
## Output Format
|
|
```
|
|
Thought: ...
|
|
Action: ...
|
|
```
|
|
|
|
## Action Space
|
|
{action_space}
|
|
|
|
## Note
|
|
- Use {language} in `Thought` part.
|
|
- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
|
|
|
|
## User Instruction
|
|
{instruction}
|
|
"""
|
|
|
|
|
|
FAILURE_INDICATORS = [
|
|
# Direct inability expressions
|
|
"无法", "不能", "不可以", "做不到", "实现不了", "完成不了","没法",
|
|
|
|
# Regret/apology expressions
|
|
"遗憾", "抱歉", "很抱歉", "非常抱歉", "对不起",
|
|
|
|
# Not supported/available
|
|
"不直接支持", "不支持", "不提供", "不具备", "没有权限", "权限不足", "不在这里面","不符合",#"不存在",
|
|
|
|
# Cannot access/handle
|
|
"无权访问", "访问不了", "处理不了", "操作不了", "执行不了", "没找到", "空空如也",
|
|
|
|
# Not possible/feasible
|
|
"不可能", "无法实现", "实现不了", "办不到", "做不了","找不到","存在技术限制","没有找到","没有内置",
|
|
|
|
# System limitations
|
|
"超出范围", "不在我的能力范围", "能力有限", "功能限制","没有成功","没成功","硬件的问题",
|
|
|
|
# Refusal indicators
|
|
"拒绝", "不允许", "禁止", "不合适", "不恰当",
|
|
|
|
# Trying Restart
|
|
"从头开始", "藏在", "浪费时间","一个更合理的思路","正确的方向","没有意义",#, "重新","重启",
|
|
]
|