From fe3bb2fd92c0588043314b85b3617119c8eeebb4 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Fri, 11 Jul 2025 12:15:03 +0000 Subject: [PATCH] fix password&resolution --- desktop_env/controllers/setup.py | 8 +++++++- desktop_env/desktop_env.py | 2 +- desktop_env/providers/aws/manager.py | 15 +++++++++++---- mm_agents/openai_cua_agent.py | 6 +++--- run_multienv_openaicua.py | 14 ++++++-------- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 140a0a0..ad2be9a 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -27,7 +27,13 @@ import dotenv # Load environment variables from .env file dotenv.load_dotenv() -CLIENT_PASSWORD = os.getenv("CLIENT_PASSWORD", "osworld-public-evaluation") # Default password for sudo operations +if os.environ.get("PROVIDER_NAME") == "aws": + os.environ["CLIENT_PASSWORD"] = os.environ.get("CLIENT_PASSWORD_AWS", "osworld-public-evaluation") +else: + os.environ["CLIENT_PASSWORD"] = os.environ.get("CLIENT_PASSWORD", "password") + +CLIENT_PASSWORD = os.environ["CLIENT_PASSWORD"] + PROXY_CONFIG_FILE = os.getenv("PROXY_CONFIG_FILE", "evaluation_examples/settings/proxy/dataimpulse.json") # Default proxy config file logger = logging.getLogger("desktopenv.setup") diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index be817c9..37e8872 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -32,7 +32,7 @@ class DesktopEnv(gym.Env): snapshot_name: str = "init_state", action_space: str = "computer_13", cache_dir: str = "cache", - screen_size: Tuple[int] = (1920, 1080), + screen_size: Tuple[int] = (int(os.environ["SCREEN_WIDTH"]), int(os.environ["SCREEN_HEIGHT"])), headless: bool = False, require_a11y_tree: bool = True, require_terminal: bool = False, diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py index 287327d..4b53e1f 100644 --- a/desktop_env/providers/aws/manager.py +++ b/desktop_env/providers/aws/manager.py @@ -36,15 +36,22 @@ DEFAULT_REGION = "us-east-1" # todo: Add doc for the configuration of image, security group and network interface # todo: public the AMI images IMAGE_ID_MAP = { - "us-east-1": "ami-09138bff939f82bd8", - "ap-east-1": "ami-0c092a5b8be4116f5", + "us-east-1": { + (1920, 1080): "ami-09138bff939f82bd8" + }, + "ap-east-1": { + (1920, 1080): "ami-0c092a5b8be4116f5" + } } -def _allocate_vm(region=DEFAULT_REGION): +def _allocate_vm(region=DEFAULT_REGION, screen_size=(1920, 1080)): if region not in IMAGE_ID_MAP: raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}") + if screen_size not in IMAGE_ID_MAP[region]: + raise ValueError(f"Screen size {screen_size} not supported for region {region}. Supported: {list(IMAGE_ID_MAP[region].keys())}") + ami_id = IMAGE_ID_MAP[region][screen_size] ec2_client = boto3.client('ec2', region_name=region) instance_id = None @@ -86,7 +93,7 @@ def _allocate_vm(region=DEFAULT_REGION): run_instances_params = { "MaxCount": 1, "MinCount": 1, - "ImageId": IMAGE_ID_MAP[region], + "ImageId": ami_id, "InstanceType": INSTANCE_TYPE, "EbsOptimized": True, "NetworkInterfaces": [ diff --git a/mm_agents/openai_cua_agent.py b/mm_agents/openai_cua_agent.py index 15db312..064a2d8 100644 --- a/mm_agents/openai_cua_agent.py +++ b/mm_agents/openai_cua_agent.py @@ -33,7 +33,7 @@ class_ns_windows = "https://accessibility.windows.example.org/ns/class" import ast from typing import Dict, Any, Optional, Union -OPERATOR_PROMPT = """\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"osworld-public-evaluation\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """ +OPERATOR_PROMPT = f"""\n\n Here are some helpful tips:\n - computer.clipboard, computer.sync_file, computer.sync_shared_folder, computer.computer_output_citation are disabled.\n - If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.\n - My computer's password is \"{os.environ["CLIENT_PASSWORD"]}\", feel free to use it when you need sudo rights.\n - For the thunderbird account \"anonym-x2024@outlook.com\", the password is \"gTCI\";=@y7|QJ0nDa_kN3Sb&>\".\n - If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.\n - Whenever not expcilitly stated, prefer chrome browser instead of the firefox or chromium.\n - You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.\n - You must initialize the computer to solve the task. Do not try to answer the question without initializing the computer.\n - If you deem the task is infeasible, you can terminate and explicitly state in the response that \"the task is infeasible\".\n """ class Action: """Action class for the agent.""" @@ -233,8 +233,8 @@ class OpenAICUAAgent: self.tools = [{ "type": "computer_use_preview", - "display_width": 1920, - "display_height": 1080, + "display_width": int(os.environ["SCREEN_WIDTH"]), + "display_height": int(os.environ["SCREEN_HEIGHT"]), "environment": "linux" if platform == "ubuntu" else "windows" }] diff --git a/run_multienv_openaicua.py b/run_multienv_openaicua.py index be562b6..1dd8cb1 100644 --- a/run_multienv_openaicua.py +++ b/run_multienv_openaicua.py @@ -47,8 +47,6 @@ def config() -> argparse.Namespace: default="screenshot", help="Observation type", ) - parser.add_argument("--screen_width", type=int, default=1920) - parser.add_argument("--screen_height", type=int, default=1080) parser.add_argument("--sleep_after_execution", type=float, default=0.0) parser.add_argument("--max_steps", type=int, default=15) @@ -181,16 +179,16 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share signal.signal(signal.SIGTERM, lambda signum, frame: process_signal_handler(signum, frame, env_idx)) from desktop_env.providers.aws.manager import IMAGE_ID_MAP - REGION = "us-east-1" + REGION = args.region + screen_size = (int(os.environ["SCREEN_WIDTH"]), int(os.environ["SCREEN_HEIGHT"])) + ami_id = IMAGE_ID_MAP[REGION].get(screen_size, IMAGE_ID_MAP[REGION][(1920, 1080)]) env = DesktopEnv( path_to_vm=args.path_to_vm, action_space=args.action_space, - - provider_name="aws", + provider_name=os.environ["PROVIDER_NAME"], region=REGION, - snapshot_name=IMAGE_ID_MAP[REGION], - - screen_size=(args.screen_width, args.screen_height), + snapshot_name=ami_id, + screen_size=screen_size, headless=args.headless, os_type="Ubuntu", require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],