config: 将评测域改为all以支持全软件批量评测
This commit is contained in:
@@ -12,11 +12,7 @@ export PROXMOX_VM_IP="10.10.17.10"
|
||||
|
||||
# ---------- LLM API 配置 ----------
|
||||
# OpenAI 兼容代理(同时用于 Agent 模型和 Eval 模型)
|
||||
<<<<<<< HEAD
|
||||
export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17" # ⚠️ 请替换为你的实际 API Key
|
||||
=======
|
||||
export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17"
|
||||
>>>>>>> b1ed0a478511115622edf0e5c1590c93f4cdd855
|
||||
export OPENAI_BASE_URL="https://vip.apiyi.com/v1"
|
||||
|
||||
# ---------- 评测参数(按需修改) ----------
|
||||
@@ -34,15 +30,9 @@ OBSERVATION_TYPE="screenshot" # 观测类型
|
||||
ACTION_SPACE="pyautogui" # 动作空间
|
||||
SCREEN_WIDTH=1920 # 屏幕宽度
|
||||
SCREEN_HEIGHT=1080 # 屏幕高度
|
||||
<<<<<<< HEAD
|
||||
RESULT_DIR="/mnt/d/work/result" # 结果输出目录
|
||||
TEST_META="/mnt/d/work/sci-gui-agent-benchmark/evaluation_examples/test_final.json" # 评测任务列表
|
||||
DOMAIN="origin" # 评测领域
|
||||
=======
|
||||
RESULT_DIR="/Volumes/Castor/课题/results_baseline_50steps" # 结果输出目录
|
||||
TEST_META="evaluation_examples/test_final.json" # 评测任务列表
|
||||
DOMAIN="ovito" # 评测领域
|
||||
>>>>>>> b1ed0a478511115622edf0e5c1590c93f4cdd855
|
||||
DOMAIN="all" # 评测领域
|
||||
SNAPSHOT_NAME="snapshot" # 快照名称(需提前创建)
|
||||
INJECT_STEPS=false # 是否注入教程步骤到 Agent prompt(baseline 不注入)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user