Improve code logic for password & resolution
This commit is contained in:
@@ -33,7 +33,7 @@ class_ns_windows = "https://accessibility.windows.example.org/ns/class"
|
||||
import ast
|
||||
from typing import Dict, Any, Optional, Union
|
||||
|
||||
OPERATOR_PROMPT = f"""\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"{os.environ["CLIENT_PASSWORD"]}\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """
|
||||
OPERATOR_PROMPT = """\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"{CLIENT_PASSWORD}\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """
|
||||
|
||||
class Action:
|
||||
"""Action class for the agent."""
|
||||
@@ -213,7 +213,11 @@ class OpenAICUAAgent:
|
||||
observation_type="screenshot_a11y_tree",
|
||||
# observation_type can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"]
|
||||
max_trajectory_length=100,
|
||||
a11y_tree_max_tokens=10000
|
||||
a11y_tree_max_tokens=10000,
|
||||
client_password="",
|
||||
provider_name="aws",
|
||||
screen_width=1920,
|
||||
screen_height=1080
|
||||
):
|
||||
self.env = env
|
||||
self.platform = platform
|
||||
@@ -231,12 +235,22 @@ class OpenAICUAAgent:
|
||||
self.actions = []
|
||||
self.observations = []
|
||||
|
||||
self.screen_width = screen_width
|
||||
self.screen_height = screen_height
|
||||
|
||||
self.tools = [{
|
||||
"type": "computer_use_preview",
|
||||
"display_width": int(os.environ["SCREEN_WIDTH"]),
|
||||
"display_height": int(os.environ["SCREEN_HEIGHT"]),
|
||||
"display_width": self.screen_width,
|
||||
"display_height": self.screen_height,
|
||||
"environment": "linux" if platform == "ubuntu" else "windows"
|
||||
}]
|
||||
if client_password == "":
|
||||
if provider_name == "aws":
|
||||
self.client_password = "osworld-public-evaluation"
|
||||
else:
|
||||
self.client_password = "password"
|
||||
else:
|
||||
self.client_password = client_password
|
||||
|
||||
if observation_type == "screenshot":
|
||||
if action_space == "computer_13":
|
||||
@@ -630,7 +644,8 @@ class OpenAICUAAgent:
|
||||
"""
|
||||
Predict the next action(s) based on the current observation.
|
||||
"""
|
||||
|
||||
prompt = OPERATOR_PROMPT.replace("{CLIENT_PASSWORD}", self.client_password)
|
||||
|
||||
base64_image = encode_image(obs["screenshot"])
|
||||
if self.cua_messages == []:
|
||||
self.cua_messages.append({
|
||||
@@ -642,7 +657,7 @@ class OpenAICUAAgent:
|
||||
},
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "\n " + instruction + OPERATOR_PROMPT,
|
||||
"text": "\n " + instruction + prompt,
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user