- 修复_load_screenshots_from_dir中截图按字符串排序导致step_9被误判为最终帧的bug,改为数字排序 - 对齐reeval.py的prompt逻辑:明确要求模型优先检查最终截图(STEP 1 EXAMINE FINAL SCREENSHOT FIRST) - 评估temperature从0.7降至0.2提升一致性 - 新增batch_reeval.py:基于test_final.json批量重评测已有轨迹 - 新增reeval.py:单任务重评测脚本(final-frame-anchored evaluation) - test_final.json新增avogadro(11题)和origin(8题)
74 lines
1.9 KiB
JSON
74 lines
1.9 KiB
JSON
{
|
|
"jade": [
|
|
"MDIJade6.5使用手册_task2",
|
|
"MDIJade6.5使用手册_task3",
|
|
"jade-guide-example_task1",
|
|
"jade-guide-example_task4",
|
|
"jade-guide-example_task6",
|
|
"jade-guide-example_task7",
|
|
"jade-guide-example_task8",
|
|
"jade-guide-example_task9",
|
|
"jade-guide-example_task10",
|
|
"jade-guide-example_task11",
|
|
"jade-guide-example_task12"
|
|
],
|
|
"vesta": [
|
|
"VESTA_Manual_task2",
|
|
"VESTA_Manual_task3",
|
|
"VESTA_Manual_task4",
|
|
"VESTA_Manual_task5",
|
|
"VESTA_Manual_task6",
|
|
"VESTA_Manual_task7",
|
|
"VESTA_Manual_task8",
|
|
"VESTA_Manual_task9",
|
|
"VESTA_Manual_task10",
|
|
"VESTA_Manual_task11"
|
|
],
|
|
"origin": [
|
|
"Origin_User_Guide_2025b_E_task2",
|
|
"Origin_User_Guide_2025b_E_task3",
|
|
"Origin_User_Guide_2025b_E_task4",
|
|
"Origin_User_Guide_2025b_E_task5",
|
|
"Origin_User_Guide_2025b_E_task8",
|
|
"Origin_User_Guide_2025b_E_task9",
|
|
"Origin_User_Guide_2025b_E_task11",
|
|
"Origin_User_Guide_2025b_E_task12"
|
|
],
|
|
"avogadro": [
|
|
"building-metal-complexes_task1",
|
|
"building-metal-complexes_task3",
|
|
"building-metal-complexes_task7",
|
|
"building-organic-molecules_task1",
|
|
"building-organic-molecules_task3",
|
|
"building-organic-molecules_task4",
|
|
"building-organic-molecules_task5",
|
|
"building-organic-molecules_task9",
|
|
"naming-a-molecule_task1",
|
|
"using-qtaim-and-wfn_task2",
|
|
"viewing-electrostatic-potential_task1"
|
|
],
|
|
"ovito": [
|
|
"animation_task3",
|
|
"aspherical_particles_task1",
|
|
"clone_pipeline_task1",
|
|
"customize_init_state_task1",
|
|
"data_model_task1",
|
|
"export_task1",
|
|
"marker_particles_task2",
|
|
"miscellaneous_task1",
|
|
"python_extensions_task1",
|
|
"transparent_particles_task1",
|
|
"viewports_task1",
|
|
"viewports_task2",
|
|
"viewports_task3",
|
|
"viewports_task4",
|
|
"viewports_task5",
|
|
"viewports_task6",
|
|
"viewports_task7",
|
|
"viewports_task8",
|
|
"viewports_task9",
|
|
"viewports_task10",
|
|
"viewports_task11"
|
|
]
|
|
}
|