Files
sci-gui-agent-benchmark/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
Adam Yanxiao Zhao aa05f6cc26 Add AutoGLM-OS agent (#309)
* autoglm-os initialize

* clean code

* chore: use proxy for download setup

* feat(autoglm-os): add parameter to toggle images

* fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel

* update

* add client_password

* update multienv

* fix

* fix prompt

* fix prompt

* fix prompt

* fix sys prompt

* feat: use proxy in file evaluator

* fix client_password

* fix note_prompt

* fix autoglm agent cmd type

* fix

* revert: fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel

reverts commit bab5473eea1de0e61b0e1d68b23ce324a5b0ee57

* feat(autoglm): setup tools

* fix(autoglm): remove second time of get a11y tree

* add osworld server restart

* Revert "add osworld server restart"

This reverts commit 7bd9d84122e246ce2a26de0e49c25494244c2b3d.

* fix _launch_setup

* fix autoglm agent tools & xml tree

* fix desktop_env

* fix bug for tool name capitalization

* fix: always use proxy for setup download

* add fail after exceeding max turns

* fix(autoglm): avoid adding image to message when screenshot is empty

* fix maximize_window

* fix maximize_window

* fix maximize_window

* fix import browsertools module bug

* fix task proxy config bug

* restore setup

* refactor desktop env

* restore image in provider

* restore file.py

* refactor desktop_env

* quick fix

* refactor desktop_env.step

* fix our env reset

* add max truns constraint

* clean run script

* clean lib_run_single.py

---------

Co-authored-by: hanyullai <hanyullai@outlook.com>
Co-authored-by: JingBh <jingbohao@yeah.net>
2025-08-17 12:08:40 +08:00

110 lines
2.7 KiB
JSON

{
"id": "5990457f-2adb-467b-a4af-5c857c92d762",
"snapshot": "chrome",
"instruction": "Append one entry of AI researcher Yann LeCun from Google Scholar into an existing table researchers.xlsx.",
"source": "authors",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762/researchers.xlsx",
"path": "/home/user/Desktop/researchers.xlsx"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"nautilus",
"/home/user/Desktop"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome",
"libreoffice_calc"
],
"evaluator": {
"func": "literal_match",
"result": {
"type": "content_from_vm_file",
"path": "/home/user/Desktop/researchers.xlsx",
"file_type": "xlsx",
"file_content": "last_row"
},
"expected": {
"type": "info_from_website",
"url": "https://scholar.google.com/citations?user=WLN3QrAAAAAJ&hl=en",
"infos": [
{
"action": "inner_text",
"selector": "#gsc_prf_in"
},
{
"action": "inner_text",
"selector": "#gsc_rsb_st > tbody > tr:nth-child(1) > td:nth-child(2)"
},
{
"action": "inner_text",
"selector": "#gsc_rsb_st > tbody > tr:nth-child(2) > td:nth-child(2)"
},
{
"action": "inner_text",
"selector": "#gsc_rsb_st > tbody > tr:nth-child(3) > td:nth-child(2)"
},
{
"action": "inner_text",
"selector": "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a"
},
{
"action": "click_and_attribute",
"selector": [
"#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a",
"#gsc_oci_title_gg > div:nth-child(1) > a"
],
"attribute": "href"
}
],
"backups": [
"Yann LeCun",
"345074",
"147",
"372",
"Deep learning",
"https://hal.science/hal-04206682/document"
]
},
"options": {
"type": "list",
"ignore_case": true
}
},
"proxy": true,
"fixed_ip": false,
"possibility_of_env_change": "low"
}