Dev/uitars 15 (#181)
* debug uitars1.0, add uitars1.5 * update pyautogui parser * modify function name * update parser * update prompt
This commit is contained in:
@@ -1195,43 +1195,55 @@ AGUVIS_GROUNDING_APPEND_PROMPT = """<|recipient|>os
|
|||||||
pyautogui.{function_name}"""
|
pyautogui.{function_name}"""
|
||||||
|
|
||||||
UITARS_ACTION_SPACE = """
|
UITARS_ACTION_SPACE = """
|
||||||
click(start_box='[x1, y1, x2, y2]')
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
left_double(start_box='[x1, y1, x2, y2]')
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
right_single(start_box='[x1, y1, x2, y2]')
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
||||||
hotkey(key='')
|
hotkey(key='')
|
||||||
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
||||||
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
||||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||||
finished()
|
finished()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
UITARS_CALL_USR_ACTION_SPACE = """
|
UITARS_CALL_USR_ACTION_SPACE = """
|
||||||
click(start_box='[x1, y1, x2, y2]')
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
left_double(start_box='[x1, y1, x2, y2]')
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
right_single(start_box='[x1, y1, x2, y2]')
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
||||||
hotkey(key='')
|
hotkey(key='')
|
||||||
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
||||||
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
||||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||||
finished()
|
finished()
|
||||||
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
UITARS_NORMAL_ACTION_SPACE = """
|
||||||
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
||||||
|
hotkey(key='')
|
||||||
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
||||||
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
||||||
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||||
|
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
||||||
|
"""
|
||||||
|
|
||||||
UITARS_USR_PROMPT_NOTHOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
UITARS_USR_PROMPT_NOTHOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
||||||
## Output Format
|
## Output Format
|
||||||
```
|
```
|
||||||
Action: ...
|
Action: ...
|
||||||
```
|
```
|
||||||
## Action Space
|
## Action Space
|
||||||
click(start_box='[x1, y1, x2, y2]')
|
click(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
left_double(start_box='[x1, y1, x2, y2]')
|
left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
right_single(start_box='[x1, y1, x2, y2]')
|
right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
|
||||||
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
|
||||||
hotkey(key='')
|
hotkey(key='')
|
||||||
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
||||||
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
||||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||||
finished()
|
finished()
|
||||||
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
||||||
|
|||||||
Reference in New Issue
Block a user