fix password&resolution

This commit is contained in:
yuanmengqi
2025-07-11 12:15:03 +00:00
parent 349c31fa55
commit fe3bb2fd92
5 changed files with 28 additions and 17 deletions

View File

@@ -27,7 +27,13 @@ import dotenv
# Load environment variables from .env file
dotenv.load_dotenv()
CLIENT_PASSWORD = os.getenv("CLIENT_PASSWORD", "osworld-public-evaluation") # Default password for sudo operations
if os.environ.get("PROVIDER_NAME") == "aws":
os.environ["CLIENT_PASSWORD"] = os.environ.get("CLIENT_PASSWORD_AWS", "osworld-public-evaluation")
else:
os.environ["CLIENT_PASSWORD"] = os.environ.get("CLIENT_PASSWORD", "password")
CLIENT_PASSWORD = os.environ["CLIENT_PASSWORD"]
PROXY_CONFIG_FILE = os.getenv("PROXY_CONFIG_FILE", "evaluation_examples/settings/proxy/dataimpulse.json") # Default proxy config file
logger = logging.getLogger("desktopenv.setup")

View File

@@ -32,7 +32,7 @@ class DesktopEnv(gym.Env):
snapshot_name: str = "init_state",
action_space: str = "computer_13",
cache_dir: str = "cache",
screen_size: Tuple[int] = (1920, 1080),
screen_size: Tuple[int] = (int(os.environ["SCREEN_WIDTH"]), int(os.environ["SCREEN_HEIGHT"])),
headless: bool = False,
require_a11y_tree: bool = True,
require_terminal: bool = False,

View File

@@ -36,15 +36,22 @@ DEFAULT_REGION = "us-east-1"
# todo: Add doc for the configuration of image, security group and network interface
# todo: public the AMI images
IMAGE_ID_MAP = {
"us-east-1": "ami-09138bff939f82bd8",
"ap-east-1": "ami-0c092a5b8be4116f5",
"us-east-1": {
(1920, 1080): "ami-09138bff939f82bd8"
},
"ap-east-1": {
(1920, 1080): "ami-0c092a5b8be4116f5"
}
}
def _allocate_vm(region=DEFAULT_REGION):
def _allocate_vm(region=DEFAULT_REGION, screen_size=(1920, 1080)):
if region not in IMAGE_ID_MAP:
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
if screen_size not in IMAGE_ID_MAP[region]:
raise ValueError(f"Screen size {screen_size} not supported for region {region}. Supported: {list(IMAGE_ID_MAP[region].keys())}")
ami_id = IMAGE_ID_MAP[region][screen_size]
ec2_client = boto3.client('ec2', region_name=region)
instance_id = None
@@ -86,7 +93,7 @@ def _allocate_vm(region=DEFAULT_REGION):
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": IMAGE_ID_MAP[region],
"ImageId": ami_id,
"InstanceType": INSTANCE_TYPE,
"EbsOptimized": True,
"NetworkInterfaces": [

View File

@@ -33,7 +33,7 @@ class_ns_windows = "https://accessibility.windows.example.org/ns/class"
import ast
from typing import Dict, Any, Optional, Union
OPERATOR_PROMPT = """\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"osworld-public-evaluation\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """
OPERATOR_PROMPT = f"""\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"{os.environ["CLIENT_PASSWORD"]}\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """
class Action:
"""Action class for the agent."""
@@ -233,8 +233,8 @@ class OpenAICUAAgent:
self.tools = [{
"type": "computer_use_preview",
"display_width": 1920,
"display_height": 1080,
"display_width": int(os.environ["SCREEN_WIDTH"]),
"display_height": int(os.environ["SCREEN_HEIGHT"]),
"environment": "linux" if platform == "ubuntu" else "windows"
}]

View File

@@ -47,8 +47,6 @@ def config() -> argparse.Namespace:
default="screenshot",
help="Observation type",
)
parser.add_argument("--screen_width", type=int, default=1920)
parser.add_argument("--screen_height", type=int, default=1080)
parser.add_argument("--sleep_after_execution", type=float, default=0.0)
parser.add_argument("--max_steps", type=int, default=15)
@@ -181,16 +179,16 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share
signal.signal(signal.SIGTERM, lambda signum, frame: process_signal_handler(signum, frame, env_idx))
from desktop_env.providers.aws.manager import IMAGE_ID_MAP
REGION = "us-east-1"
REGION = args.region
screen_size = (int(os.environ["SCREEN_WIDTH"]), int(os.environ["SCREEN_HEIGHT"]))
ami_id = IMAGE_ID_MAP[REGION].get(screen_size, IMAGE_ID_MAP[REGION][(1920, 1080)])
env = DesktopEnv(
path_to_vm=args.path_to_vm,
action_space=args.action_space,
provider_name="aws",
provider_name=os.environ["PROVIDER_NAME"],
region=REGION,
snapshot_name=IMAGE_ID_MAP[REGION],
screen_size=(args.screen_width, args.screen_height),
snapshot_name=ami_id,
screen_size=screen_size,
headless=args.headless,
os_type="Ubuntu",
require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],