COMPUTER_USE_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. ## Output Format ``` Thought: ... Action: ... ``` ## Action Space click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. ## Note - Use {language} in `Thought` part. - Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part. - My computer's password is 'password', feel free to use it when you need sudo rights. ## User Instruction {instruction} """ COMPUTER_USE_PROMPT_WITH_CALL_USER = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. ## Output Format ``` Thought: ... Action: ... ``` ## Action Space click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help. ## Note - Use {language} in `Thought` part. - Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part. - My computer's password is 'password', feel free to use it when you need sudo rights. ## User Instruction {instruction} """ UITARS_ACTION_SPACE = """ click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished() """ UITARS_CALL_USR_ACTION_SPACE = """ click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished() call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help. """ UITARS_NORMAL_ACTION_SPACE = """ click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. """ UITARS_USR_PROMPT_NOTHOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. ## Output Format ``` Action: ... ``` ## Action Space click(start_box='<|box_start|>(x1,y1)<|box_end|>') left_double(start_box='<|box_start|>(x1,y1)<|box_end|>') right_single(start_box='<|box_start|>(x1,y1)<|box_end|>') drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') hotkey(key='') type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. finished() call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help. ## User Instruction {instruction} """ UITARS_USR_PROMPT_THOUGHT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. ## Output Format ``` Thought: ... Action: ... ``` ## Action Space {action_space} ## Note - Use {language} in `Thought` part. - Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part. ## User Instruction {instruction} """ FAILURE_INDICATORS = [ # Direct inability expressions "无法", "不能", "不可以", "做不到", "实现不了", "完成不了","没法", # Regret/apology expressions "遗憾", "抱歉", "很抱歉", "非常抱歉", "对不起", # Not supported/available "不直接支持", "不支持", "不提供", "不具备", "没有权限", "权限不足", "不在这里面","不符合",#"不存在", # Cannot access/handle "无权访问", "访问不了", "处理不了", "操作不了", "执行不了", "没找到", "空空如也", # Not possible/feasible "不可能", "无法实现", "实现不了", "办不到", "做不了","找不到","存在技术限制","没有找到","没有内置", # System limitations "超出范围", "不在我的能力范围", "能力有限", "功能限制","没有成功","没成功","硬件的问题", # Refusal indicators "拒绝", "不允许", "禁止", "不合适", "不恰当", # Trying Restart "从头开始", "藏在", "浪费时间","一个更合理的思路","正确的方向","没有意义",#, "重新","重启", ]