@@ -25,8 +25,9 @@ REGISTRY_PATH = '.aws_vms'
|
|||||||
DEFAULT_REGION = "us-east-1"
|
DEFAULT_REGION = "us-east-1"
|
||||||
# todo: Add doc for the configuration of image, security group and network interface
|
# todo: Add doc for the configuration of image, security group and network interface
|
||||||
# todo: public the AMI images
|
# todo: public the AMI images
|
||||||
|
# ami-05e7d7bd279ea4f14
|
||||||
IMAGE_ID_MAP = {
|
IMAGE_ID_MAP = {
|
||||||
"us-east-1": "ami-05e7d7bd279ea4f14",
|
"us-east-1": "ami-02fea2e5b77c79c17",
|
||||||
"ap-east-1": "ami-0c092a5b8be4116f5",
|
"ap-east-1": "ami-0c092a5b8be4116f5",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
27
evaluation_examples/test_small_debug copy.json
Normal file
27
evaluation_examples/test_small_debug copy.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"multi_apps": [
|
||||||
|
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
|
||||||
|
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||||
|
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
|
||||||
|
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
|
||||||
|
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
|
||||||
|
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
|
||||||
|
"8e116af7-7db7-4e35-a68b-b0939c066c78",
|
||||||
|
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
|
||||||
|
],
|
||||||
|
"os": [
|
||||||
|
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||||
|
],
|
||||||
|
"thunderbird": [
|
||||||
|
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||||
|
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||||
|
],
|
||||||
|
"vlc": [
|
||||||
|
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||||
|
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||||
|
],
|
||||||
|
"vs_code": [
|
||||||
|
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||||
|
"276cc624-87ea-4f08-ab93-f770e3790175"
|
||||||
|
]
|
||||||
|
}
|
||||||
27
evaluation_examples/test_small_debug.json
Normal file
27
evaluation_examples/test_small_debug.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"multi_apps": [
|
||||||
|
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
|
||||||
|
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||||
|
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
|
||||||
|
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
|
||||||
|
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
|
||||||
|
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
|
||||||
|
"8e116af7-7db7-4e35-a68b-b0939c066c78",
|
||||||
|
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
|
||||||
|
],
|
||||||
|
"os": [
|
||||||
|
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||||
|
],
|
||||||
|
"thunderbird": [
|
||||||
|
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||||
|
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||||
|
],
|
||||||
|
"vlc": [
|
||||||
|
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||||
|
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||||
|
],
|
||||||
|
"vs_code": [
|
||||||
|
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||||
|
"276cc624-87ea-4f08-ab93-f770e3790175"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -45,6 +45,16 @@ class_ns_windows = "https://accessibility.windows.example.org/ns/class"
|
|||||||
import ast
|
import ast
|
||||||
from typing import Dict, Any, Optional, Union
|
from typing import Dict, Any, Optional, Union
|
||||||
|
|
||||||
|
OPERATOR_PROMPT = """Here are some helpful tips:
|
||||||
|
(1) computer.clipboard, computer.sync_file, computer.sync.shared_folder, computer.computer_output_citation are disabled.
|
||||||
|
(2) If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.
|
||||||
|
(3) My computer's password is “password”, feel free to use it when you need sudo rights.
|
||||||
|
(4) For the thunderbird account “anonym-x2024@outlook.com”, the password is “gTCI”;=@y7—QJ0nDa_kN3Sb¿”.
|
||||||
|
(5) If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.
|
||||||
|
(6) You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.
|
||||||
|
(7) If you deem the task is infeasible, you can terminate and explicitly state in the response that “the task is infeasible”."""
|
||||||
|
|
||||||
|
|
||||||
class Action:
|
class Action:
|
||||||
"""Action class for the agent."""
|
"""Action class for the agent."""
|
||||||
def __init__(self, raw_action: Union[Dict, str], action_space: str):
|
def __init__(self, raw_action: Union[Dict, str], action_space: str):
|
||||||
@@ -639,7 +649,7 @@ class OpenAICUAAgent:
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "input_text",
|
"type": "input_text",
|
||||||
"text": instruction
|
"text": instruction + "\n" + OPERATOR_PROMPT,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -149,14 +149,14 @@ def distribute_tasks(test_all_meta: dict, num_envs: int) -> List[Dict]:
|
|||||||
|
|
||||||
def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, shared_scores: list):
|
def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, shared_scores: list):
|
||||||
"""Run tasks for a single environment."""
|
"""Run tasks for a single environment."""
|
||||||
|
# ami-05e7d7bd279ea4f14
|
||||||
env = DesktopEnv(
|
env = DesktopEnv(
|
||||||
path_to_vm=args.path_to_vm,
|
path_to_vm=args.path_to_vm,
|
||||||
action_space=args.action_space,
|
action_space=args.action_space,
|
||||||
|
|
||||||
provider_name="aws",
|
provider_name="aws",
|
||||||
region="us-east-1",
|
region="us-east-1",
|
||||||
snapshot_name="ami-05e7d7bd279ea4f14",
|
snapshot_name="ami-02fea2e5b77c79c17",
|
||||||
|
|
||||||
screen_size=(args.screen_width, args.screen_height),
|
screen_size=(args.screen_width, args.screen_height),
|
||||||
headless=args.headless,
|
headless=args.headless,
|
||||||
@@ -330,7 +330,7 @@ def get_result(action_space, use_model, observation_type, result_dir, total_file
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
####### The complete version of the list of examples #######
|
####### The complete version of the list of examples #######
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
|
|||||||
9
run_operator.sh
Normal file
9
run_operator.sh
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
python run_multienv_openaicua.py \
|
||||||
|
--headless \
|
||||||
|
--observation_type screenshot \
|
||||||
|
--model computer-use-preview \
|
||||||
|
--result_dir ./results_operator_aws_new \
|
||||||
|
--test_all_meta_path evaluation_examples/test_small_debug.json \
|
||||||
|
--region us-east-1 \
|
||||||
|
--max_steps 150 \
|
||||||
|
--num_envs 5
|
||||||
Reference in New Issue
Block a user