Wxy/opencua (#290)

* OpenCUA Agent code base

* update url

* debug, modify url input

* debug opencua

* show result

* debug agent history overlap

* modify opencua agent; add comment lines

* update parallel; clean code; use sleep 3s

* ui-tars-0717

* update detail

* add system password to system prompt

* add running command
This commit is contained in:
Xinyuan Wang
2025-07-31 08:53:49 +08:00
committed by GitHub
parent 3d32556085
commit 862d704b8c
2 changed files with 55 additions and 14 deletions

View File

@@ -1,3 +1,35 @@
"""
This is the script to run OpenCUA agents on OSWorld tasks using AWS provider.
You should first host the OpenCUA model on your local machine or a server.
Command for OpenCUA-7B and OpenCUA-32B:
```
python run_multienv_opencua.py \
--headless \
--observation_type screenshot \
--model OpenCUA-32B \
--result_dir ./results --test_all_meta_path evaluation_examples/test_all_no_gdrive.json \
--max_steps 100 \
--num_envs 30 \
--coordinate_type qwen25
```
Command for OpenCUA-Qwen2-7B and OpenCUA-A3B:
```
python run_multienv_opencua.py \
--headless \
--observation_type screenshot \
--model OpenCUA-A3B \
--result_dir ./results \
--test_all_meta_path evaluation_examples/test_nogdrive.json \
--max_steps 100 \
--num_envs 10 \
--coordinate_type relative
```
"""
from __future__ import annotations
import argparse
import datetime
@@ -7,9 +39,7 @@ import os
import sys
import signal
import time
from typing import List, Dict
import math
from tqdm import tqdm
from typing import List
from multiprocessing import Process, Manager
from multiprocessing import current_process
import lib_run_single
@@ -26,7 +56,7 @@ if os.path.exists(".env"):
from dotenv import load_dotenv
load_dotenv()
# Logger Configs {{{ #
# Logger Configs
def config() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Run end-to-end evaluation on the benchmark"
@@ -58,7 +88,7 @@ def config() -> argparse.Namespace:
parser.add_argument("--model", type=str, default="opencua")
parser.add_argument("--temperature", type=float, default=0)
parser.add_argument("--top_p", type=float, default=0.9)
parser.add_argument("--max_tokens", type=int, default=8196)
parser.add_argument("--max_tokens", type=int, default=2048)
parser.add_argument("--stop_token", type=str, default=None)
# OpenCUAagent config
@@ -129,7 +159,6 @@ stdout_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")