From 3fd9fa94e63d1077e6a1a027757bd492c7e7cc98 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Wed, 2 Jul 2025 11:24:45 +0000 Subject: [PATCH] clean chrome_fix code --- .../settings/proxy/dataimpulse.json | 4 +- evaluation_examples/test_fix_chrome.json | 50 --- manual_examine.py | 309 ------------------ monitor/.env | 4 +- run_human_examine.sh | 9 - run_multienv_openaicua.py | 3 +- run_operator.sh | 9 - 7 files changed, 5 insertions(+), 383 deletions(-) delete mode 100644 evaluation_examples/test_fix_chrome.json delete mode 100644 manual_examine.py delete mode 100644 run_human_examine.sh delete mode 100644 run_operator.sh diff --git a/evaluation_examples/settings/proxy/dataimpulse.json b/evaluation_examples/settings/proxy/dataimpulse.json index 5f7c0a4..2e7e65a 100644 --- a/evaluation_examples/settings/proxy/dataimpulse.json +++ b/evaluation_examples/settings/proxy/dataimpulse.json @@ -2,8 +2,8 @@ { "host": "gw.dataimpulse.com", "port": 823, - "username": "e750e5abb74376d28361", - "password": "e5ec245537e1e76a", + "username": "your_username", + "password": "your_password", "protocol": "http", "provider": "dataimpulse", "type": "residential", diff --git a/evaluation_examples/test_fix_chrome.json b/evaluation_examples/test_fix_chrome.json deleted file mode 100644 index 7f9ed93..0000000 --- a/evaluation_examples/test_fix_chrome.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "chrome": [ - "bb5e4c0d-f964-439c-97b6-bdb9747de3f4", - "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3", - "06fe7178-4491-4589-810f-2e2bc9502122", - "e1e75309-3ddb-4d09-92ec-de869c928143", - "35253b65-1c19-4304-8aa4-6884b8218fc0", - "2ad9387a-65d8-4e33-ad5b-7580065a27ca", - "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263", - "44ee5668-ecd5-4366-a6ce-c1c9b8d4e938", - "2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3", - "480bcfea-d68f-4aaa-a0a9-2589ef319381", - "af630914-714e-4a24-a7bb-f9af687d3b91", - "3720f614-37fd-4d04-8a6b-76f54f8c222d", - "99146c54-4f37-4ab8-9327-5f3291665e1e", - "12086550-11c0-466b-b367-1d9e75b3910e", - "6766f2b8-8a72-417f-a9e5-56fcaa735837", - "93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9", - "ae78f875-5b98-4907-bbb5-9c737fc68c03", - "3299584d-8f11-4457-bf4c-ce98f7600250", - "030eeff7-b492-4218-b312-701ec99ee0cc", - "9656a811-9b5b-4ddf-99c7-5117bcef0626", - "fc6d8143-9452-4171-9459-7f515143419a", - "a96b564e-dbe9-42c3-9ccf-b4498073438a", - "1704f00f-79e6-43a7-961b-cedd3724d5fd", - "f3b19d1e-2d48-44e9-b4e1-defcae1a0197", - "82bc8d6a-36eb-4d2d-8801-ef714fb1e55a", - "47543840-672a-467d-80df-8f7c3b9788c9", - "c1fa57f3-c3db-4596-8f09-020701085416", - "da46d875-6b82-4681-9284-653b0c7ae241", - "6c4c23a1-42a4-43cc-9db1-2f86ff3738cc", - "f79439ad-3ee8-4f99-a518-0eb60e5652b0", - "b7895e80-f4d1-4648-bee0-4eb45a6f1fa8", - "9f3f70fc-5afc-4958-a7b7-3bb4fcb01805", - "7f52cab9-535c-4835-ac8c-391ee64dc930", - "82279c77-8fc6-46f6-9622-3ba96f61b477", - "2888b4e6-5b47-4b57-8bf5-c73827890774", - "b4f95342-463e-4179-8c3f-193cd7241fb2", - "f5d96daf-83a8-4c86-9686-bada31fc66ab", - "121ba48f-9e17-48ce-9bc6-a4fb17a7ebba", - "368d9ba4-203c-40c1-9fa3-da2f1430ce63", - "59155008-fe71-45ec-8a8f-dc35497b6aa8", - "a728a36e-8bf1-4bb6-9a03-ef039a5233f0", - "b070486d-e161-459b-aa2b-ef442d973b92", - "0d8b7de3-e8de-4d86-b9fd-dd2dce58a217", - "9f935cce-0a9f-435f-8007-817732bfc0a5", - "f0b971a1-6831-4b9b-a50e-22a6e47f45ba", - "cabb3bae-cccb-41bd-9f5d-0f3a9fecd825" - ] -} \ No newline at end of file diff --git a/manual_examine.py b/manual_examine.py deleted file mode 100644 index ddd69b7..0000000 --- a/manual_examine.py +++ /dev/null @@ -1,309 +0,0 @@ -from __future__ import annotations -import argparse -import datetime -import json -import logging -import os -import sys -import signal -import time -from typing import List, Dict -from tqdm import tqdm -from desktop_env.desktop_env import DesktopEnv - -# Global variables for signal handling -active_environment = None -is_terminating = False - -# load the environment variables from .env file -if os.path.exists(".env"): - from dotenv import load_dotenv - load_dotenv() - -# Logger Configs {{{ # -def config() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description="Manual examination of benchmark tasks" - ) - - # environment config - parser.add_argument("--path_to_vm", type=str, default=None) - parser.add_argument( - "--headless", action="store_true", help="Run in headless machine" - ) - parser.add_argument( - "--action_space", type=str, default="pyautogui", help="Action type" - ) - parser.add_argument( - "--observation_type", - choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"], - default="screenshot", - help="Observation type", - ) - parser.add_argument("--screen_width", type=int, default=1920) - parser.add_argument("--screen_height", type=int, default=1080) - parser.add_argument("--sleep_after_execution", type=float, default=0.0) - parser.add_argument("--max_steps", type=int, default=15) - - # agent config - parser.add_argument("--max_trajectory_length", type=int, default=3) - parser.add_argument( - "--test_config_base_dir", type=str, default="evaluation_examples" - ) - - # example config - parser.add_argument("--domain", type=str, required=True, help="Specific domain to examine") - parser.add_argument("--example_id", type=str, required=True, help="Specific example ID to examine") - parser.add_argument( - "--test_all_meta_path", type=str, default="evaluation_examples/test_all.json" - ) - - # logging related - parser.add_argument("--result_dir", type=str, default="./results_manual") - parser.add_argument("--log_level", type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - default='INFO', help="Set the logging level") - - # aws config - parser.add_argument( - "--region", type=str, default="us-east-1", help="AWS region for the VM" - ) - args = parser.parse_args() - return args - -args = config() # Get command line arguments first - -logger = logging.getLogger() -log_level = getattr(logging, args.log_level.upper()) -logger.setLevel(log_level) - -datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - -file_handler = logging.FileHandler( - os.path.join("logs", "manual-{:}.log".format(datetime_str)), encoding="utf-8" -) -debug_handler = logging.FileHandler( - os.path.join("logs", "manual-debug-{:}.log".format(datetime_str)), encoding="utf-8" -) -stdout_handler = logging.StreamHandler(sys.stdout) - -file_handler.setLevel(logging.INFO) -debug_handler.setLevel(logging.DEBUG) -stdout_handler.setLevel(log_level) - -formatter = logging.Formatter( - fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s" -) -file_handler.setFormatter(formatter) -debug_handler.setFormatter(formatter) -stdout_handler.setFormatter(formatter) - -stdout_handler.addFilter(logging.Filter("desktopenv")) - -logger.addHandler(file_handler) -logger.addHandler(debug_handler) -logger.addHandler(stdout_handler) -# }}} Logger Configs # - -logger = logging.getLogger("desktopenv.experiment") - - -def setup_example_logger(example, example_result_dir): - """设置特定样例的日志记录器""" - runtime_logger = logging.getLogger(f"desktopenv.example.{example['id']}") - runtime_logger.setLevel(logging.DEBUG) - runtime_logger.addHandler(logging.FileHandler(os.path.join(example_result_dir, "runtime.log"))) - return runtime_logger - - -def run_manual_examination(env, example, instruction, args, example_result_dir): - """手动检查单个样例的函数""" - runtime_logger = setup_example_logger(example, example_result_dir) - - # 重置环境并加载任务配置 - env.reset(task_config=example) - logger.info("环境正在初始化,请等待60秒...") - time.sleep(60) # Wait for the environment to be ready - - # 获取初始观察 - obs = env._get_obs() - - # 保存初始状态截图 - initial_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - with open(os.path.join(example_result_dir, f"initial_state_{initial_timestamp}.png"), "wb") as f: - f.write(obs['screenshot']) - - # 记录任务信息 - with open(os.path.join(example_result_dir, "task_info.json"), "w", encoding="utf-8") as f: - json.dump({ - "domain": args.domain, - "example_id": args.example_id, - "instruction": instruction, - "initial_timestamp": initial_timestamp, - "example_config": example - }, f, indent=2, ensure_ascii=False) - - # 开始录制 - env.controller.start_recording() - - logger.info("="*80) - logger.info(f"任务域: {args.domain}") - logger.info(f"样例ID: {args.example_id}") - logger.info(f"任务指令: {instruction}") - logger.info("="*80) - logger.info("环境已准备就绪!") - logger.info("请在虚拟机中手动执行任务...") - logger.info("完成后请按回车键继续进行评估...") - logger.info("="*80) - - # 阻塞等待用户手动操作 - try: - input("按回车键开始评估...") - except KeyboardInterrupt: - logger.info("用户中断操作") - return None - - logger.info("开始评估...") - - # 获取最终状态截图 - final_obs = env._get_obs() - final_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - with open(os.path.join(example_result_dir, f"final_state_{final_timestamp}.png"), "wb") as f: - f.write(final_obs['screenshot']) - - # 评估结果 - result = env.evaluate() - logger.info(f"评估结果: {result:.2f}") - - # 保存结果 - with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: - f.write(f"{result}\n") - - # 保存执行记录 - with open(os.path.join(example_result_dir, "execution_log.jsonl"), "w", encoding="utf-8") as f: - f.write(json.dumps({ - "type": "manual_execution", - "initial_timestamp": initial_timestamp, - "final_timestamp": final_timestamp, - "result": result, - "initial_screenshot": f"initial_state_{initial_timestamp}.png", - "final_screenshot": f"final_state_{final_timestamp}.png" - }, ensure_ascii=False)) - f.write("\n") - - # 结束录制 - env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) - - return result - - -def signal_handler(signum, frame): - """处理终止信号以优雅关闭环境""" - global is_terminating, active_environment - - # 避免重复处理 - if is_terminating: - return - - is_terminating = True - logger.info(f"接收到信号 {signum}。正在优雅关闭...") - - # 关闭环境 - if active_environment: - try: - logger.info("正在关闭环境...") - active_environment.close() - logger.info("环境已成功关闭") - except Exception as e: - logger.error(f"关闭环境时出错: {e}") - - logger.info("关闭完成。退出程序。") - sys.exit(0) - - -def main(): - global active_environment - - # 注册信号处理器以优雅终止 - signal.signal(signal.SIGINT, signal_handler) # Handle Ctrl+C - signal.signal(signal.SIGTERM, signal_handler) # Handle termination signal - - try: - args = config() - logger.info("参数: %s", args) - - # 加载指定的任务 - config_file = os.path.join( - args.test_config_base_dir, f"examples/{args.domain}/{args.example_id}.json" - ) - - if not os.path.exists(config_file): - logger.error(f"配置文件不存在: {config_file}") - return - - with open(config_file, "r", encoding="utf-8") as f: - example = json.load(f) - - # 创建结果目录 - example_result_dir = os.path.join( - args.result_dir, - args.action_space, - args.observation_type, - "manual_examination", - args.domain, - args.example_id, - ) - os.makedirs(example_result_dir, exist_ok=True) - - # 设置环境 - from desktop_env.providers.aws.manager import IMAGE_ID_MAP - REGION = "us-east-1" - active_environment = DesktopEnv( - path_to_vm=args.path_to_vm, - action_space=args.action_space, - provider_name="aws", - region=REGION, - snapshot_name=IMAGE_ID_MAP[REGION], - screen_size=(args.screen_width, args.screen_height), - headless=args.headless, - os_type="Ubuntu", - require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], - enable_proxy=True - ) - - # 执行手动检查 - result = run_manual_examination( - active_environment, - example, - example["instruction"], - args, - example_result_dir - ) - - if result is not None: - logger.info(f"手动检查完成。最终结果: {result:.2f}") - else: - logger.info("手动检查被中断") - - except KeyboardInterrupt: - logger.info("主进程接收到KeyboardInterrupt") - # 信号处理器会处理清理工作 - except Exception as e: - logger.error(f"主进程中的意外错误: {e}", exc_info=True) - # 也触发清理 - signal_handler(signal.SIGTERM, None) - finally: - # 最终清理以防任何环境或进程仍然存在 - logger.info("主进程最终清理...") - if active_environment is not None: - try: - logger.info("在最终清理中关闭环境...") - active_environment.close() - logger.info("在最终清理中环境已成功关闭") - except Exception as e: - logger.error(f"最终环境清理期间出错: {e}") - - -if __name__ == "__main__": - # 禁用tokenizers并行处理避免警告 - os.environ["TOKENIZERS_PARALLELISM"] = "false" - main() \ No newline at end of file diff --git a/monitor/.env b/monitor/.env index 62ba076..05618af 100644 --- a/monitor/.env +++ b/monitor/.env @@ -2,9 +2,9 @@ # Do not write any secret keys or sensitive information here. # Monitor configuration -TASK_CONFIG_PATH=../evaluation_examples/test_fix_chrome.json +TASK_CONFIG_PATH=../evaluation_examples/test_all.json EXAMPLES_BASE_PATH=../evaluation_examples/examples -RESULTS_BASE_PATH=../results_chrome_operator +RESULTS_BASE_PATH=../results_all ACTION_SPACE=pyautogui OBSERVATION_TYPE=screenshot MODEL_NAME=computer-use-preview diff --git a/run_human_examine.sh b/run_human_examine.sh deleted file mode 100644 index c8e8447..0000000 --- a/run_human_examine.sh +++ /dev/null @@ -1,9 +0,0 @@ -python manual_examine.py \ - --headless \ - --observation_type screenshot \ - --result_dir ./results_human_examine_chrome_fix_1 \ - --test_all_meta_path evaluation_examples/test_fix_chrome.json \ - --region us-east-1 \ - --domain chrome \ - --example_id 030eeff7-b492-4218-b312-701ec99ee0cc \ - --max_steps 3 \ No newline at end of file diff --git a/run_multienv_openaicua.py b/run_multienv_openaicua.py index be562b6..241f73e 100644 --- a/run_multienv_openaicua.py +++ b/run_multienv_openaicua.py @@ -193,8 +193,7 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share screen_size=(args.screen_width, args.screen_height), headless=args.headless, os_type="Ubuntu", - require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], - enable_proxy=True + require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"] ) active_environments.append(env) agent = OpenAICUAAgent( diff --git a/run_operator.sh b/run_operator.sh deleted file mode 100644 index da52d1a..0000000 --- a/run_operator.sh +++ /dev/null @@ -1,9 +0,0 @@ -python run_multienv_openaicua.py \ ---headless \ ---observation_type screenshot \ ---model computer-use-preview \ ---result_dir ./results_chrome_operator \ ---test_all_meta_path evaluation_examples/test_fix_chrome.json \ ---region us-east-1 \ ---max_steps 150 \ ---num_envs 10 \ No newline at end of file