edit operator
This commit is contained in:
@@ -25,8 +25,9 @@ REGISTRY_PATH = '.aws_vms'
|
||||
DEFAULT_REGION = "us-east-1"
|
||||
# todo: Add doc for the configuration of image, security group and network interface
|
||||
# todo: public the AMI images
|
||||
# ami-05e7d7bd279ea4f14
|
||||
IMAGE_ID_MAP = {
|
||||
"us-east-1": "ami-05e7d7bd279ea4f14",
|
||||
"us-east-1": "ami-02fea2e5b77c79c17",
|
||||
"ap-east-1": "ami-0c092a5b8be4116f5",
|
||||
}
|
||||
|
||||
|
||||
27
evaluation_examples/test_small_debug copy.json
Normal file
27
evaluation_examples/test_small_debug copy.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"multi_apps": [
|
||||
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
|
||||
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
|
||||
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
|
||||
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
|
||||
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
|
||||
"8e116af7-7db7-4e35-a68b-b0939c066c78",
|
||||
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
|
||||
],
|
||||
"os": [
|
||||
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||
],
|
||||
"thunderbird": [
|
||||
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||
],
|
||||
"vlc": [
|
||||
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||
],
|
||||
"vs_code": [
|
||||
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||
"276cc624-87ea-4f08-ab93-f770e3790175"
|
||||
]
|
||||
}
|
||||
27
evaluation_examples/test_small_debug.json
Normal file
27
evaluation_examples/test_small_debug.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"multi_apps": [
|
||||
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
|
||||
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
|
||||
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
|
||||
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
|
||||
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
|
||||
"8e116af7-7db7-4e35-a68b-b0939c066c78",
|
||||
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
|
||||
],
|
||||
"os": [
|
||||
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||
],
|
||||
"thunderbird": [
|
||||
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||
],
|
||||
"vlc": [
|
||||
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||
],
|
||||
"vs_code": [
|
||||
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||
"276cc624-87ea-4f08-ab93-f770e3790175"
|
||||
]
|
||||
}
|
||||
@@ -45,6 +45,16 @@ class_ns_windows = "https://accessibility.windows.example.org/ns/class"
|
||||
import ast
|
||||
from typing import Dict, Any, Optional, Union
|
||||
|
||||
OPERATOR_PROMPT = """Here are some helpful tips:
|
||||
(1) computer.clipboard, computer.sync_file, computer.sync.shared_folder, computer.computer_output_citation are disabled.
|
||||
(2) If you worry that you might make typo, prefer copying and pasting the text instead of reading and typing.
|
||||
(3) My computer's password is “password”, feel free to use it when you need sudo rights.
|
||||
(4) For the thunderbird account “anonym-x2024@outlook.com”, the password is “gTCI”;=@y7—QJ0nDa_kN3Sb¿”.
|
||||
(5) If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.
|
||||
(6) You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.
|
||||
(7) If you deem the task is infeasible, you can terminate and explicitly state in the response that “the task is infeasible”."""
|
||||
|
||||
|
||||
class Action:
|
||||
"""Action class for the agent."""
|
||||
def __init__(self, raw_action: Union[Dict, str], action_space: str):
|
||||
@@ -639,7 +649,7 @@ class OpenAICUAAgent:
|
||||
},
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": instruction
|
||||
"text": instruction + "\n" + OPERATOR_PROMPT,
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
@@ -145,14 +145,14 @@ def distribute_tasks(test_all_meta: dict, num_envs: int) -> List[Dict]:
|
||||
|
||||
def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, shared_scores: list):
|
||||
"""Run tasks for a single environment."""
|
||||
|
||||
# ami-05e7d7bd279ea4f14
|
||||
env = DesktopEnv(
|
||||
path_to_vm=args.path_to_vm,
|
||||
action_space=args.action_space,
|
||||
|
||||
provider_name="aws",
|
||||
region="us-east-1",
|
||||
snapshot_name="ami-05e7d7bd279ea4f14",
|
||||
snapshot_name="ami-02fea2e5b77c79c17",
|
||||
|
||||
screen_size=(args.screen_width, args.screen_height),
|
||||
headless=args.headless,
|
||||
@@ -326,7 +326,7 @@ def get_result(action_space, use_model, observation_type, result_dir, total_file
|
||||
if __name__ == "__main__":
|
||||
####### The complete version of the list of examples #######
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
|
||||
args = config()
|
||||
|
||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||
|
||||
9
run_operator.sh
Normal file
9
run_operator.sh
Normal file
@@ -0,0 +1,9 @@
|
||||
python run_multienv_openaicua.py \
|
||||
--headless \
|
||||
--observation_type screenshot \
|
||||
--model computer-use-preview \
|
||||
--result_dir ./results_operator_aws_new \
|
||||
--test_all_meta_path evaluation_examples/test_small_debug.json \
|
||||
--region us-east-1 \
|
||||
--max_steps 150 \
|
||||
--num_envs 5
|
||||
Reference in New Issue
Block a user