Dev/uitars 15 (#178)

* debug uitars1.0, add uitars1.5

* update pyautogui parser

* modify function name

* update parser
This commit is contained in:
Shihao Liang
2025-04-17 18:49:21 +08:00
committed by GitHub
parent 1d10514125
commit bd2e980666
2 changed files with 273 additions and 65 deletions

View File

@@ -91,10 +91,20 @@ def config() -> argparse.Namespace:
)
# lm config
parser.add_argument("--model", type=str, default="gpt-4o")
parser.add_argument("--model", type=str, default="uitars")
parser.add_argument("--model_type", type=str, default="qwen25vl")
parser.add_argument("--infer_mode", type=str, default="qwen25vl_normal")
parser.add_argument("--prompt_style", type=str, default="qwen25vl_normal")
parser.add_argument("--input_swap", action="store_true", help="Use copy and paste to type content")
parser.add_argument("--language", type=str, default="Chinese")
parser.add_argument("--max_pixels", type=float, default=16384*28*28)
parser.add_argument("--min_pixels", type=float, default=100*28*28)
parser.add_argument("--temperature", type=float, default=1.0)
parser.add_argument("--top_p", type=float, default=0.9)
parser.add_argument("--max_tokens", type=int, default=1500)
parser.add_argument("--top_k", type=int, default=-1)
parser.add_argument("--history_n", type=int, default=5)
parser.add_argument("--callusr_tolerance", type=int, default=3)
parser.add_argument("--max_tokens", type=int, default=500)
parser.add_argument("--stop_token", type=str, default=None)
# example config
@@ -128,8 +138,18 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
"max_steps": args.max_steps,
"max_trajectory_length": args.max_trajectory_length,
"model": args.model,
"model_type": args.model_type,
"infer_mode": args.infer_mode,
"prompt_style": args.prompt_style,
"input_swap": args.input_swap,
"language": args.language,
"history_n": args.history_n,
"max_pixels": args.max_pixels,
"min_pixels": args.min_pixels,
"callusr_tolerance": args.callusr_tolerance,
"temperature": args.temperature,
"top_p": args.top_p,
"top_k": args.top_k,
"max_tokens": args.max_tokens,
"stop_token": args.stop_token,
"result_dir": args.result_dir,
@@ -137,12 +157,24 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
agent = UITARSAgent(
model=args.model,
max_tokens=args.max_tokens,
top_p=args.top_p,
temperature=args.temperature,
action_space=args.action_space,
observation_type=args.observation_type,
max_trajectory_length=args.max_trajectory_length,
model_type=args.model_type,
runtime_conf = {
"infer_mode": args.infer_mode,
"prompt_style": args.prompt_style,
"input_swap": args.input_swap,
"language": args.language,
"history_n": args.history_n,
"max_pixels": args.max_pixels,
"min_pixels": args.min_pixels,
"callusr_tolerance": args.callusr_tolerance,
"temperature": args.temperature,
"top_p": args.top_p,
"top_k": args.top_k,
"max_tokens": args.max_tokens
}
)
env = DesktopEnv(