flsol demo: fix top_p/claude/gemini, force coordinates, add reflection comments, screenshot mode
This commit is contained in:
@@ -366,10 +366,17 @@ class DesktopEnv(gym.Env):
|
||||
else None
|
||||
|
||||
if "expected" in self.evaluator and len(self.evaluator["expected"]) > 0:
|
||||
self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
|
||||
self.evaluator["expected"]] \
|
||||
if isinstance(self.evaluator["expected"], list) \
|
||||
else getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
||||
expected_val = self.evaluator["expected"]
|
||||
if isinstance(expected_val, list):
|
||||
self.expected_getter: Getter = [
|
||||
getattr(getters, "get_{:}".format(exp["type"])) if (exp and "type" in exp) else None
|
||||
for exp in expected_val
|
||||
]
|
||||
elif isinstance(expected_val, dict) and "type" in expected_val:
|
||||
self.expected_getter: Getter = getattr(getters, "get_{:}".format(expected_val["type"]))
|
||||
else:
|
||||
# No 'type' key (e.g. vllm_eval uses plain description dict) — no getter needed
|
||||
self.expected_getter = None
|
||||
else:
|
||||
self.expected_getter = [None] * len(self.metric) \
|
||||
if isinstance(self.metric, list) \
|
||||
|
||||
@@ -1,25 +1,9 @@
|
||||
{
|
||||
"id": "flsol_taskE_auto_optimize_scan",
|
||||
"snapshot": "flsol",
|
||||
"instruction": "在 FL Solutions for F-4600 中,设置激发波长 350 nm,发射扫描范围 380–700 nm,调整仪器参数使荧光发射峰完整显示,截图保存最终谱图。",
|
||||
"instruction": "假设你的目标就是测量样品的荧光图谱。下面的透露给你的辅助信息,把辅助信息当成你自己的思维和决策并完全遵循,而不是把它当成任务,**绝对不允许在输出的「观察」「判断」中说明是任务要求,而是把辅助信息当成你自己的经验,用根据我的经验使用某某配置而不是根据任务要求使用某某配置**因为下面的步骤是完全正确和精准的,你只需要执行,而且绝对不允许在:是严格按坐标执行,每步执行完等待界面响应再执行下一步,禁止连续操作。修改任何数值输入框时必须:先double_click双击全选pyautogui.click(x,y);pyautogui.click(x,y)),再pyautogui.press(delete),最后用pyautogui.typewrite输入新值。1.等待主界面加载并确认正常;2.点击sample(80,159)打开保存位置选择,双击选中sample name文本框(563,346),并清空和重命名为test 3. 双击选中comment 文本框(583,381)的内容,并清空 4. 点击elect 按钮(796,613)弹出输出的目标文件夹,5. 点击创建新文件夹图标(784,410),命名为test,然后回车,再次回车进入test文件夹 5.然后点击保存(942,718)。然后点击ok(535,650)关闭sample界面 6. 点击Method(36,166)进入Instrument(456,293);7.设置EX_WL(497,472)为350nm;8.设置EM_Start_WL(493,500)为400nm,EM_End_WL(495,524)为700nm;9.设置PMT_Voltage(758,369)为700V;10.设置EX_Slit下拉框(771,321)为2.5nm,点击下拉框EM_Slit(764,344)出现下拉选项选择5nm(748,386);11.点击Report(638,290)确认Data_start(718,455)和Data_end(717,480)与EM_Start_WL和EM_End_WL的值一致;12.点击确定(641,717);13.等待parameters更新完成并ready后,才可点击Measure(182,162)测量并观察谱图;14.点击底部左下角ex3的输出结果最大化按钮(108,948)",
|
||||
"source": "custom",
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"C:\\Program Files\\FL Solutions\\flsol.exe"
|
||||
],
|
||||
"shell": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 12
|
||||
}
|
||||
}
|
||||
],
|
||||
"config": [],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"flsol"
|
||||
@@ -43,9 +27,391 @@
|
||||
"possibility_of_env_change": "medium",
|
||||
"metadata": {
|
||||
"input_files": [],
|
||||
"steps": "1. 等待 FL Solutions 主界面完全加载,确认仪器状态正常(无报错弹窗)。\n2. 点击菜单栏 'Method' → 'New',选择 'Wavelength Scan'(波长扫描)方法类型,新建一个扫描方法。\n3. 在方法参数区域,将 'Excitation Wavelength'(激发波长)设为 '350' nm。\n4. 将发射扫描起始波长(Start WL / Em Start)设为 '380' nm,结束波长(End WL / Em End)设为 '700' nm。\n5. 将 'PMT Voltage' 设为 '700' V。\n6. 将 'EX Slit'(激发狭缝)和 'EM Slit'(发射狭缝)均设为 '2.5' nm。\n7. 点击 'Measure'(或按 F4 快捷键)执行第一次测量,等待扫描完成,观察图表区域中出现的谱图曲线。\n8. 【判断1:信号过强/截断】若谱图曲线的峰顶部出现水平平坦段(说明信号超量程被截断),则:\n a. 返回方法参数区域,将 PMT Voltage 降低 50 V(如从 700V 降至 650V);\n b. 若降压后仍截断,可同时将狭缝宽度缩小一档(如从 2.5 nm 改为 1.0 nm);\n c. 重新点击 'Measure' 执行测量,再次观察谱图。\n9. 【判断2:信号过弱】若谱图曲线几乎是一条接近零的直线(信号太弱),则:\n a. 返回方法参数区域,将 PMT Voltage 升高 50 V(如从 700V 升至 750V);\n b. 若仍过弱,可同时将狭缝宽度增大一档(如从 2.5 nm 改为 5.0 nm);\n c. 重新点击 'Measure' 执行测量,再次观察谱图。\n10. 重复步骤 8-9,每次调整后重新测量,直到谱图满足以下条件:峰形完整(顶部无截断平台)、峰值强度在纵轴量程的 30%–90% 范围内、基线平稳、峰形平滑。\n11. 满足条件后,截图保存当前谱图界面,记录最终参数(PMT Voltage、EX Slit、EM Slit 数值)。",
|
||||
"steps": "",
|
||||
"steps_original": "1. 打开 FL Solutions,新建波长扫描方法。\n2. 设置激发波长 350 nm,发射范围 380-700 nm,初始 PMT 700V,狭缝 2.5 nm。\n3. 执行测量,观察谱图。\n4. 若峰截断则降低 PMT 电压和/或缩小狭缝;若信号过弱则升高 PMT 电压和/或增大狭缝。\n5. 反复迭代测量直到峰形完整显示,截图记录最终结果。",
|
||||
"difficulty": "hard",
|
||||
"highlight": "AI 能够读取谱图质量并进行闭环迭代调参,体现真正的仪器操控智能,而非机械执行固定步骤。"
|
||||
"highlight": "AI 能够读取谱图质量并进行闭环迭代调参,体现真正的仪器操控智能,而非机械执行固定步骤。",
|
||||
"ui_coordinates": {
|
||||
"application": "FL Solutions - F-4600",
|
||||
"main_window": {
|
||||
"name": "F-4600 FL Spectrophotometer on USB",
|
||||
"toolbar": [
|
||||
{
|
||||
"name": "Method",
|
||||
"type": "Button_Icon",
|
||||
"center": [
|
||||
36,
|
||||
166
|
||||
],
|
||||
"description": "打开分析方法设置窗口"
|
||||
},
|
||||
{
|
||||
"name": "Measure",
|
||||
"type": "Button_Icon",
|
||||
"center": [
|
||||
182,
|
||||
162
|
||||
],
|
||||
"description": "执行测量"
|
||||
}
|
||||
],
|
||||
"monitor_panel": [
|
||||
{
|
||||
"name": "Fluorescence_Value",
|
||||
"type": "Display",
|
||||
"center": [
|
||||
959,
|
||||
240
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "EX_WL_Display",
|
||||
"type": "Text",
|
||||
"center": [
|
||||
912,
|
||||
262
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "EM_WL_Display",
|
||||
"type": "Text",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Status_Ready_Label",
|
||||
"type": "Status_Indicator",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"color_hint": "Green"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sub_windows": {
|
||||
"Analysis_Method": {
|
||||
"common_controls": {
|
||||
"tabs": [
|
||||
{
|
||||
"name": "General",
|
||||
"center": [
|
||||
398,
|
||||
291
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Instrument",
|
||||
"center": [
|
||||
456,
|
||||
293
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Monitor",
|
||||
"center": [
|
||||
524,
|
||||
295
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Processing",
|
||||
"center": [
|
||||
574,
|
||||
293
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Report",
|
||||
"center": [
|
||||
638,
|
||||
290
|
||||
]
|
||||
}
|
||||
],
|
||||
"footer_buttons": [
|
||||
{
|
||||
"name": "确定",
|
||||
"center": [
|
||||
641,
|
||||
717
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "取消",
|
||||
"center": [
|
||||
719,
|
||||
717
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "应用",
|
||||
"center": [
|
||||
788,
|
||||
717
|
||||
],
|
||||
"status": "disabled"
|
||||
},
|
||||
{
|
||||
"name": "帮助",
|
||||
"center": [
|
||||
882,
|
||||
717
|
||||
]
|
||||
}
|
||||
],
|
||||
"close_button": {
|
||||
"name": "Close_Window",
|
||||
"center": [
|
||||
905,
|
||||
263
|
||||
]
|
||||
}
|
||||
},
|
||||
"tabs_content": {
|
||||
"General": [
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Measurement",
|
||||
"center": [
|
||||
512,
|
||||
321
|
||||
],
|
||||
"value": "Wavelength scan"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Operator",
|
||||
"center": [
|
||||
533,
|
||||
349
|
||||
],
|
||||
"value": "Administrator"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Instrument_Model",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Sampling",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"status": "disabled"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Comments",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Use_sample_table",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Load",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Save",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Save_As",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
}
|
||||
],
|
||||
"Instrument": [
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Scan_mode",
|
||||
"center": [
|
||||
550,
|
||||
322
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Data_mode",
|
||||
"center": [
|
||||
526,
|
||||
347
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EX_WL",
|
||||
"center": [
|
||||
497,
|
||||
472
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EM_Start_WL",
|
||||
"center": [
|
||||
493,
|
||||
500
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EM_End_WL",
|
||||
"center": [
|
||||
495,
|
||||
524
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Scan_speed",
|
||||
"center": [
|
||||
497,
|
||||
549
|
||||
],
|
||||
"unit": "nm/min"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "EX_Slit",
|
||||
"center": [
|
||||
771,
|
||||
321
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "EM_Slit",
|
||||
"center": [
|
||||
764,
|
||||
344
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit_Spin",
|
||||
"name": "PMT_Voltage",
|
||||
"center": [
|
||||
758,
|
||||
369
|
||||
],
|
||||
"unit": "V"
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "PMT_Voltage_Limit",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"label": "PMT Voltage 0-1000V"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Response",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"unit": "s"
|
||||
},
|
||||
{
|
||||
"type": "Edit_Spin",
|
||||
"name": "Replicates",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
}
|
||||
],
|
||||
"Monitor": [
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Y_Axis_Max",
|
||||
"center": [
|
||||
455,
|
||||
343
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Y_Axis_Min",
|
||||
"center": [
|
||||
455,
|
||||
375
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Open_processing_after_acquisition",
|
||||
"center": [
|
||||
396,
|
||||
437
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Overlay",
|
||||
"center": [
|
||||
395,
|
||||
503
|
||||
]
|
||||
}
|
||||
],
|
||||
"Report": [
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Data_start_Value",
|
||||
"center": [
|
||||
718,
|
||||
455
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Data_end_Value",
|
||||
"center": [
|
||||
717,
|
||||
480
|
||||
],
|
||||
"unit": "nm"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,417 @@
|
||||
{
|
||||
"id": "flsol_taskE_auto_optimize_scan",
|
||||
"snapshot": "flsol",
|
||||
"instruction": "一步一步地执行,别他妈的给老子跳步 完全按照老子给的指令执行5.点击'PMT_Voltage'(758,369)设置为700V;6.设置'EX_Slit(下拉框)(771,321)'为2.5nm,'点击下拉框EM_Slit(764,344)'出现下拉选项,选择5nm(748,386);7.点击'Report(638,290)'确认'Data_start(718,455)'和'Data_end(717,480)与instrument的EM_Start_WL和EM_End_WL'的值一致;8.点击'确定(641,717)';9.点击'Measure(182,162)'测量并观察谱图;10.若信号截断则返回'Method(36,166)'降低'PMT_Voltage(774,369)'或缩小狭缝(771,321)后重测;11.若信号过弱则返回'Method(36,166)'升高'PMT_Voltage(774,369)'或增大狭缝(764,344)后重测;12.重复调优至峰值在30%-90%且峰形完整;13.点击'输出结果(108,948)'并最大化。",
|
||||
"source": "custom",
|
||||
"config": [],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"flsol"
|
||||
],
|
||||
"evaluator": {
|
||||
"postconfig": [
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "vllm_eval",
|
||||
"expected": {
|
||||
"description": "FL Solutions 主界面中图表区域应显示一条完整的荧光发射光谱曲线:峰形平滑、顶部无截断(曲线最高点不贴近纵轴上限)、基线平稳、信噪比良好。界面中的仪器参数区域应可见激发波长 350 nm、发射扫描范围 380-700 nm,以及经过迭代调整后的最终 PMT 电压和狭缝宽度参数。"
|
||||
}
|
||||
},
|
||||
"proxy": false,
|
||||
"fixed_ip": true,
|
||||
"possibility_of_env_change": "medium",
|
||||
"metadata": {
|
||||
"input_files": [],
|
||||
"steps": "{\"application\": \"FL Solutions - F-4600\", \"main_window\": {\"toolbar\": [{\"name\": \"Method\", \"center\": [36, 166]}, {\"name\": \"Measure\", \"center\": [182, 162]}]}, \"sub_windows\": {\"Analysis_Method\": {\"tabs\": [{\"name\": \"Instrument\", \"center\": [456, 293]}], \"footer_buttons\": [{\"name\": \"确定\", \"center\": [641, 717]}, {\"name\": \"取消\", \"center\": [719, 717]}], \"Instrument\": [{\"name\": \"EX_WL\", \"center\": [497, 472]}, {\"name\": \"EM_Start_WL\", \"center\": [493, 500]}, {\"name\": \"EM_End_WL\", \"center\": [495, 524]}, {\"name\": \"EX_Slit\", \"center\": [771, 321]}, {\"name\": \"EM_Slit\", \"center\": [764, 344]}, {\"name\": \"PMT_Voltage\", \"center\": [774, 369]}]}}}",
|
||||
"steps_original": "1. 打开 FL Solutions,新建波长扫描方法。\n2. 设置激发波长 350 nm,发射范围 380-700 nm,初始 PMT 700V,狭缝 2.5 nm。\n3. 执行测量,观察谱图。\n4. 若峰截断则降低 PMT 电压和/或缩小狭缝;若信号过弱则升高 PMT 电压和/或增大狭缝。\n5. 反复迭代测量直到峰形完整显示,截图记录最终结果。",
|
||||
"difficulty": "hard",
|
||||
"highlight": "AI 能够读取谱图质量并进行闭环迭代调参,体现真正的仪器操控智能,而非机械执行固定步骤。",
|
||||
"ui_coordinates": {
|
||||
"application": "FL Solutions - F-4600",
|
||||
"main_window": {
|
||||
"name": "F-4600 FL Spectrophotometer on USB",
|
||||
"toolbar": [
|
||||
{
|
||||
"name": "Method",
|
||||
"type": "Button_Icon",
|
||||
"center": [
|
||||
36,
|
||||
166
|
||||
],
|
||||
"description": "打开分析方法设置窗口"
|
||||
},
|
||||
{
|
||||
"name": "Measure",
|
||||
"type": "Button_Icon",
|
||||
"center": [
|
||||
182,
|
||||
162
|
||||
],
|
||||
"description": "执行测量"
|
||||
}
|
||||
],
|
||||
"monitor_panel": [
|
||||
{
|
||||
"name": "Fluorescence_Value",
|
||||
"type": "Display",
|
||||
"center": [
|
||||
959,
|
||||
240
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "EX_WL_Display",
|
||||
"type": "Text",
|
||||
"center": [
|
||||
912,
|
||||
262
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "EM_WL_Display",
|
||||
"type": "Text",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Status_Ready_Label",
|
||||
"type": "Status_Indicator",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"color_hint": "Green"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sub_windows": {
|
||||
"Analysis_Method": {
|
||||
"common_controls": {
|
||||
"tabs": [
|
||||
{
|
||||
"name": "General",
|
||||
"center": [
|
||||
398,
|
||||
291
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Instrument",
|
||||
"center": [
|
||||
456,
|
||||
293
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Monitor",
|
||||
"center": [
|
||||
524,
|
||||
295
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Processing",
|
||||
"center": [
|
||||
574,
|
||||
293
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Report",
|
||||
"center": [
|
||||
638,
|
||||
290
|
||||
]
|
||||
}
|
||||
],
|
||||
"footer_buttons": [
|
||||
{
|
||||
"name": "确定",
|
||||
"center": [
|
||||
641,
|
||||
717
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "取消",
|
||||
"center": [
|
||||
719,
|
||||
717
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "应用",
|
||||
"center": [
|
||||
788,
|
||||
717
|
||||
],
|
||||
"status": "disabled"
|
||||
},
|
||||
{
|
||||
"name": "帮助",
|
||||
"center": [
|
||||
882,
|
||||
717
|
||||
]
|
||||
}
|
||||
],
|
||||
"close_button": {
|
||||
"name": "Close_Window",
|
||||
"center": [
|
||||
905,
|
||||
263
|
||||
]
|
||||
}
|
||||
},
|
||||
"tabs_content": {
|
||||
"General": [
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Measurement",
|
||||
"center": [
|
||||
512,
|
||||
321
|
||||
],
|
||||
"value": "Wavelength scan"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Operator",
|
||||
"center": [
|
||||
533,
|
||||
349
|
||||
],
|
||||
"value": "Administrator"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Instrument_Model",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Sampling",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"status": "disabled"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Comments",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Use_sample_table",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Load",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Save",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Button",
|
||||
"name": "Save_As",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
}
|
||||
],
|
||||
"Instrument": [
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Scan_mode",
|
||||
"center": [
|
||||
550,
|
||||
322
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Data_mode",
|
||||
"center": [
|
||||
526,
|
||||
347
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EX_WL",
|
||||
"center": [
|
||||
497,
|
||||
472
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EM_Start_WL",
|
||||
"center": [
|
||||
493,
|
||||
500
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "EM_End_WL",
|
||||
"center": [
|
||||
495,
|
||||
524
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Scan_speed",
|
||||
"center": [
|
||||
497,
|
||||
549
|
||||
],
|
||||
"unit": "nm/min"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "EX_Slit",
|
||||
"center": [
|
||||
771,
|
||||
321
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "EM_Slit",
|
||||
"center": [
|
||||
764,
|
||||
344
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit_Spin",
|
||||
"name": "PMT_Voltage",
|
||||
"center": [
|
||||
758,
|
||||
379
|
||||
],
|
||||
"unit": "V"
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "PMT_Voltage_Limit",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"label": "PMT Voltage 0-1000V"
|
||||
},
|
||||
{
|
||||
"type": "ComboBox",
|
||||
"name": "Response",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"unit": "s"
|
||||
},
|
||||
{
|
||||
"type": "Edit_Spin",
|
||||
"name": "Replicates",
|
||||
"center": [
|
||||
0,
|
||||
0
|
||||
]
|
||||
}
|
||||
],
|
||||
"Monitor": [
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Y_Axis_Max",
|
||||
"center": [
|
||||
455,
|
||||
343
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Y_Axis_Min",
|
||||
"center": [
|
||||
455,
|
||||
375
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Open_processing_after_acquisition",
|
||||
"center": [
|
||||
396,
|
||||
437
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CheckBox",
|
||||
"name": "Overlay",
|
||||
"center": [
|
||||
395,
|
||||
503
|
||||
]
|
||||
}
|
||||
],
|
||||
"Report": [
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Data_start_Value",
|
||||
"center": [
|
||||
718,
|
||||
455
|
||||
],
|
||||
"unit": "nm"
|
||||
},
|
||||
{
|
||||
"type": "Edit",
|
||||
"name": "Data_end_Value",
|
||||
"center": [
|
||||
717,
|
||||
480
|
||||
],
|
||||
"unit": "nm"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"flsol": [
|
||||
"flsol_task4_measure"
|
||||
"flsol_taskE_auto_optimize_scan"
|
||||
]
|
||||
}
|
||||
|
||||
5
evaluation_examples/test_flsol1.json
Normal file
5
evaluation_examples/test_flsol1.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"flsol": [
|
||||
"flsol_taskE_auto_optimize_scan1"
|
||||
]
|
||||
}
|
||||
@@ -752,7 +752,6 @@ class PromptAgent:
|
||||
elif self.model.startswith("claude"):
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
temperature = payload["temperature"]
|
||||
|
||||
claude_messages = []
|
||||
@@ -796,11 +795,10 @@ class PromptAgent:
|
||||
"max_tokens": max_tokens,
|
||||
"messages": claude_messages,
|
||||
"temperature": temperature,
|
||||
"top_p": top_p
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
"https://api.apiyi.com/v1/messages",
|
||||
os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").rstrip("/") + "/messages",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
@@ -816,7 +814,7 @@ class PromptAgent:
|
||||
elif self.model.startswith("mistral"):
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
assert self.observation_type in pure_text_settings, f"The model {self.model} can only support text-based input, please consider change based model or settings"
|
||||
@@ -871,7 +869,7 @@ class PromptAgent:
|
||||
# THUDM/cogagent-chat-hf
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
cog_messages = []
|
||||
@@ -920,7 +918,7 @@ class PromptAgent:
|
||||
elif self.model in ["gemini-pro", "gemini-pro-vision"]:
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
if self.model == "gemini-pro":
|
||||
@@ -989,10 +987,10 @@ class PromptAgent:
|
||||
)
|
||||
return response.text
|
||||
|
||||
elif self.model.startswith("gemini"):
|
||||
elif self.model in ["gemini-pro", "gemini-pro-vision", "gemini-1.5-pro", "gemini-1.5-flash"]:
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
gemini_messages = []
|
||||
@@ -1068,7 +1066,7 @@ class PromptAgent:
|
||||
elif self.model == "llama3-70b":
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
assert self.observation_type in pure_text_settings, f"The model {self.model} can only support text-based input, please consider change based model or settings"
|
||||
@@ -1121,7 +1119,7 @@ class PromptAgent:
|
||||
elif self.model.startswith("qwen"):
|
||||
messages = payload["messages"]
|
||||
max_tokens = payload["max_tokens"]
|
||||
top_p = payload["top_p"]
|
||||
top_p = payload.get("top_p", 0.9)
|
||||
temperature = payload["temperature"]
|
||||
|
||||
qwen_messages = []
|
||||
@@ -1200,7 +1198,21 @@ class PromptAgent:
|
||||
return ""
|
||||
|
||||
else:
|
||||
raise ValueError("Invalid model: " + self.model)
|
||||
# Fallback: openai-compatible for any unrecognized model (e.g. gemini-3.1 via apiyi)
|
||||
base_url = os.environ.get('OPENAI_BASE_URL', os.environ.get('OPENAI_API_BASE', 'https://api.openai.com'))
|
||||
api_url = f"{base_url}/chat/completions" if base_url.endswith('/v1') else f"{base_url}/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
|
||||
}
|
||||
logger.info("Generating content with openai-compatible model: %s", self.model)
|
||||
response = requests.post(api_url, headers=headers, json=payload)
|
||||
if response.status_code != 200:
|
||||
logger.error("Failed to call LLM: " + response.text)
|
||||
time.sleep(5)
|
||||
return ""
|
||||
else:
|
||||
return response.json()['choices'][0]['message']['content']
|
||||
|
||||
def parse_actions(self, response: str, masks=None):
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ For each step, you will get an observation of an image, which is the screenshot
|
||||
|
||||
You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
|
||||
Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history
|
||||
You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct.
|
||||
If the task instruction contains explicit coordinates like (x,y), use those exact coordinates directly without re-estimating from the screenshot. Otherwise, specify coordinates based on your observation.
|
||||
You ONLY need to return the code inside a code block, like this:
|
||||
```python
|
||||
# your code here
|
||||
@@ -18,7 +18,11 @@ When you think the task is done, return ```DONE```.
|
||||
IMPORTANT: You must return EITHER a python code block OR a special code (WAIT/DONE/FAIL) in each response, NEVER BOTH. Do NOT include ```DONE``` in the same response as a ```python``` code block. After executing an action, wait for the next observation to verify the result before returning ```DONE```.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
""".strip()
|
||||
|
||||
SYS_PROMPT_IN_SCREENSHOT_OUT_CODE_FEW_SHOT = """
|
||||
@@ -557,7 +561,11 @@ When you think the task is done, return ```DONE```.
|
||||
IMPORTANT: You must return EITHER a python code block OR a special code (WAIT/DONE/FAIL) in each response, NEVER BOTH. Do NOT include ```DONE``` in the same response as a ```python``` code block. After executing an action, wait for the next observation to verify the result before returning ```DONE```.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
""".strip()
|
||||
|
||||
SYS_PROMPT_IN_A11Y_OUT_ACTION = """
|
||||
@@ -826,7 +834,11 @@ When you think the task is done, return ```DONE```.
|
||||
IMPORTANT: You must return EITHER a python code block OR a special code (WAIT/DONE/FAIL) in each response, NEVER BOTH. Do NOT include ```DONE``` in the same response as a ```python``` code block. After executing an action, wait for the next observation to verify the result before returning ```DONE```.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
""".strip()
|
||||
|
||||
SYS_PROMPT_IN_BOTH_OUT_ACTION = """
|
||||
@@ -1101,7 +1113,11 @@ When you think the task can not be done, return ```FAIL```, don't easily say ```
|
||||
When you think the task is done, return ```DONE```.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
""".strip()
|
||||
|
||||
SYS_PROMPT_SEEACT = """
|
||||
@@ -1151,7 +1167,11 @@ When you think the task can not be done, return ```FAIL```, don't easily say ```
|
||||
When you think the task is done, return ```DONE```.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
"""
|
||||
|
||||
AGUVIS_PLANNER_SYS_PROMPT = """
|
||||
@@ -1177,7 +1197,11 @@ Here are some guidelines for you:
|
||||
3. Return ```Done``` when you think the task is done. Return ```Fail``` when you think the task can not be done.
|
||||
|
||||
My computer's password is '{{CLIENT_PASSWORD}}', feel free to use it when you need sudo rights.
|
||||
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
|
||||
Before writing any code, you MUST add Chinese comments in this exact format:
|
||||
# 【观察】Describe what you currently see on the screen in 1-2 sentences
|
||||
# 【判断】Explain which step you are on and what needs to be done next
|
||||
# 【动作】Describe the specific action you are about to take
|
||||
Then provide the pyautogui code. This reflection format is mandatory for every response and makes the demo easy to follow.
|
||||
""".strip()
|
||||
|
||||
AGUVIS_SYS_PROMPT = """You are a GUI agent. You are given a task and a screenshot of the screen. You need to perform a series of pyautogui actions to complete the task.
|
||||
|
||||
@@ -12,22 +12,22 @@ export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17"
|
||||
export OPENAI_BASE_URL="https://vip.apiyi.com/v1"
|
||||
|
||||
# ---------- 评测参数(对齐 run_proxmox.sh)----------
|
||||
MODEL="gpt-5.4"
|
||||
MODEL="gpt-5.4" # claude-sonnet-4-6
|
||||
EVAL_MODEL="gemini-3.1-pro-preview"
|
||||
MAX_STEPS=50
|
||||
SLEEP_AFTER_EXEC=3
|
||||
TEMPERATURE=0
|
||||
TOP_P=0.9
|
||||
MAX_TOKENS=16384
|
||||
MAX_TRAJECTORY_LENGTH=3
|
||||
OBSERVATION_TYPE="screenshot_a11y_tree"
|
||||
MAX_TOKENS=36748
|
||||
MAX_TRAJECTORY_LENGTH=5
|
||||
OBSERVATION_TYPE="screenshot"
|
||||
ACTION_SPACE="pyautogui"
|
||||
SCREEN_WIDTH=1280
|
||||
SCREEN_HEIGHT=1024
|
||||
RESULT_DIR="/Users/lizhanyuan/Downloads/results2/flsol"
|
||||
RESULT_DIR="/Users/lizhanyuan/Downloads/results7/flsol"
|
||||
TEST_META="evaluation_examples/test_flsol.json"
|
||||
DOMAIN="flsol"
|
||||
INJECT_STEPS=true
|
||||
INJECT_STEPS=False
|
||||
|
||||
# ---------- 预检查 ----------
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
Reference in New Issue
Block a user