From 04089fa21832c1c6b3e2bc90e8e72c326d96210a Mon Sep 17 00:00:00 2001 From: kingyang0 <445823862@qq.com> Date: Thu, 26 Mar 2026 10:50:07 +0800 Subject: [PATCH] Save local changes before pulling --- evaluation_examples/test_final.json | 23 +++++++++++++++++++++++ run_proxmox.sh | 12 ++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/evaluation_examples/test_final.json b/evaluation_examples/test_final.json index 7a1aa67..a46ac11 100644 --- a/evaluation_examples/test_final.json +++ b/evaluation_examples/test_final.json @@ -23,5 +23,28 @@ "VESTA_Manual_task9", "VESTA_Manual_task10", "VESTA_Manual_task11" + ], + "origin": [ + "Origin_User_Guide_2025b_E_task2", + "Origin_User_Guide_2025b_E_task3", + "Origin_User_Guide_2025b_E_task4", + "Origin_User_Guide_2025b_E_task5", + "Origin_User_Guide_2025b_E_task8", + "Origin_User_Guide_2025b_E_task9", + "Origin_User_Guide_2025b_E_task11", + "Origin_User_Guide_2025b_E_task12" + ], + "avogadro": [ + "building-metal-complexes_task1", + "building-metal-complexes_task3", + "building-metal-complexes_task7", + "building-organic-molecules_task1", + "building-organic-molecules_task3", + "building-organic-molecules_task4", + "building-organic-molecules_task5", + "building-organic-molecules_task9", + "naming-a-molecule_task1", + "using-qtaim-and-wfn_task2", + "viewing-electrostatic-potential_task1" ] } diff --git a/run_proxmox.sh b/run_proxmox.sh index 2d9bc74..53fd558 100755 --- a/run_proxmox.sh +++ b/run_proxmox.sh @@ -12,7 +12,7 @@ export PROXMOX_VM_IP="10.10.17.10" # ---------- LLM API 配置 ---------- # OpenAI 兼容代理(同时用于 Agent 模型和 Eval 模型) -export OPENAI_API_KEY="sk-5zk3CL73E2DsNyMn5a6dA357B6214eEd9240A674Ec0555Be" +export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17" # ⚠️ 请替换为你的实际 API Key export OPENAI_BASE_URL="https://vip.apiyi.com/v1" # ---------- 评测参数(按需修改) ---------- @@ -26,13 +26,13 @@ TEMPERATURE=0.5 # 生成温度(越低越稳定 TOP_P=0.9 # nucleus sampling MAX_TOKENS=16384 # 模型最大输出 token 数 MAX_TRAJECTORY_LENGTH=3 # 历史轨迹保留长度 -OBSERVATION_TYPE="screenshot_a11y_tree" # 观测类型 +OBSERVATION_TYPE="screenshot" # 观测类型 ACTION_SPACE="pyautogui" # 动作空间 SCREEN_WIDTH=1920 # 屏幕宽度 SCREEN_HEIGHT=1080 # 屏幕高度 -RESULT_DIR="/Volumes/Castor/课题/results" # 结果输出目录 -TEST_META="evaluation_examples/test_final.json" # 评测任务列表 -DOMAIN="vesta" # 评测领域 +RESULT_DIR="/mnt/d/work/result" # 结果输出目录 +TEST_META="/mnt/d/work/sci-gui-agent-benchmark/evaluation_examples/test_final.json" # 评测任务列表 +DOMAIN="origin" # 评测领域 SNAPSHOT_NAME="snapshot" # 快照名称(需提前创建) INJECT_STEPS=false # 是否注入教程步骤到 Agent prompt(baseline 不注入) @@ -101,7 +101,7 @@ else INJECT_STEPS_FLAG="--no_inject_steps" fi -python run.py \ +python3 run.py \ --provider_name "${PROVIDER}" \ --path_to_vm "${VM_ID}" \ --observation_type "${OBSERVATION_TYPE}" \