Files
sci-gui-agent-benchmark/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
st2rb8g 61f265a082 fix some multi_apps tasks (#245)
* fix chrome

* fix some multi_apps tasks.

* fix some multiapps tasks

* fix some multiapps tasks

---------

Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
2025-07-11 06:32:13 +08:00

76 lines
1.9 KiB
JSON

{
"id": "da922383-bfa4-4cd3-bbad-6bebab3d7742",
"snapshot": "multiapps",
"instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their title to /home/user/Documents/Blog.",
"source": "authors",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://lilianweng.github.io/posts/2023-06-23-agent/",
"https://lilianweng.github.io/posts/2024-02-05-human-data-quality/"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome",
"os"
],
"evaluator": {
"postconfig": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742/script.py",
"path": "/home/user/Desktop/script.py"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": "pip install PyMuPDF",
"shell": "true"
}
}
],
"func": "exact_match",
"result": {
"type": "vm_command_line",
"command": "python /home/user/Desktop/script.py",
"shell": "true"
},
"expected": {
"type": "rule",
"rules": {
"expected": "[1, 1]\n"
}
}
},
"proxy": true
}