Wxy/opencua (#290)
* OpenCUA Agent code base * update url * debug, modify url input * debug opencua * show result * debug agent history overlap * modify opencua agent; add comment lines * update parallel; clean code; use sleep 3s * ui-tars-0717 * update detail * add system password to system prompt * add running command
This commit is contained in:
@@ -1,3 +1,35 @@
|
||||
"""
|
||||
This is the script to run OpenCUA agents on OSWorld tasks using AWS provider.
|
||||
|
||||
You should first host the OpenCUA model on your local machine or a server.
|
||||
|
||||
Command for OpenCUA-7B and OpenCUA-32B:
|
||||
```
|
||||
python run_multienv_opencua.py \
|
||||
--headless \
|
||||
--observation_type screenshot \
|
||||
--model OpenCUA-32B \
|
||||
--result_dir ./results --test_all_meta_path evaluation_examples/test_all_no_gdrive.json \
|
||||
--max_steps 100 \
|
||||
--num_envs 30 \
|
||||
--coordinate_type qwen25
|
||||
```
|
||||
|
||||
Command for OpenCUA-Qwen2-7B and OpenCUA-A3B:
|
||||
```
|
||||
python run_multienv_opencua.py \
|
||||
--headless \
|
||||
--observation_type screenshot \
|
||||
--model OpenCUA-A3B \
|
||||
--result_dir ./results \
|
||||
--test_all_meta_path evaluation_examples/test_nogdrive.json \
|
||||
--max_steps 100 \
|
||||
--num_envs 10 \
|
||||
--coordinate_type relative
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import datetime
|
||||
@@ -7,9 +39,7 @@ import os
|
||||
import sys
|
||||
import signal
|
||||
import time
|
||||
from typing import List, Dict
|
||||
import math
|
||||
from tqdm import tqdm
|
||||
from typing import List
|
||||
from multiprocessing import Process, Manager
|
||||
from multiprocessing import current_process
|
||||
import lib_run_single
|
||||
@@ -26,7 +56,7 @@ if os.path.exists(".env"):
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# Logger Configs {{{ #
|
||||
# Logger Configs
|
||||
def config() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run end-to-end evaluation on the benchmark"
|
||||
@@ -58,7 +88,7 @@ def config() -> argparse.Namespace:
|
||||
parser.add_argument("--model", type=str, default="opencua")
|
||||
parser.add_argument("--temperature", type=float, default=0)
|
||||
parser.add_argument("--top_p", type=float, default=0.9)
|
||||
parser.add_argument("--max_tokens", type=int, default=8196)
|
||||
parser.add_argument("--max_tokens", type=int, default=2048)
|
||||
parser.add_argument("--stop_token", type=str, default=None)
|
||||
|
||||
# OpenCUAagent config
|
||||
@@ -129,7 +159,6 @@ stdout_handler.addFilter(logging.Filter("desktopenv"))
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(debug_handler)
|
||||
logger.addHandler(stdout_handler)
|
||||
# }}} Logger Configs #
|
||||
|
||||
logger = logging.getLogger("desktopenv.experiment")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user