From bdaf37e0e58abdddec31792a8e126956dc80d1b8 Mon Sep 17 00:00:00 2001 From: ChenYXxxx <164883942+ChenYXxxx@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:59:05 +0800 Subject: [PATCH] fix_os&gimp (#220) * Update ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json * Update c288e301-e626-4b98-a1ab-159dcb162af5.json * Update 3ce045a0-877b-42aa-8d2c-b4a863336ab8.json * Update b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json * Update 2e6f678f-472d-4c55-99cc-8e7c5c402a71.json Please batch process all images on the desktop by increasing their brightness to 50, instead of adjusting them individually. * Update 5ca86c6f-f317-49d8-b6a7-b527541caae8.json * Update a746add2-cab0-4740-ac36-c3769d9bfb46.json * Update a746add2-cab0-4740-ac36-c3769d9bfb46.json * Update 62f7fd55-0687-4a43-b6e1-3eda16fc6252.json * Update d52d6308-ec58-42b7-a2c9-de80e4837b2b.json * Update d16c99dc-2a1e-46f2-b350-d97c86c85c15.json * Update d16c99dc-2a1e-46f2-b350-d97c86c85c15.json * Update 58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json --- .../2e6f678f-472d-4c55-99cc-8e7c5c402a71.json | 4 +- .../58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json | 4 +- .../5ca86c6f-f317-49d8-b6a7-b527541caae8.json | 4 +- .../62f7fd55-0687-4a43-b6e1-3eda16fc6252.json | 4 +- .../a746add2-cab0-4740-ac36-c3769d9bfb46.json | 140 ++++++++++-------- .../d16c99dc-2a1e-46f2-b350-d97c86c85c15.json | 8 +- .../d52d6308-ec58-42b7-a2c9-de80e4837b2b.json | 4 +- .../3ce045a0-877b-42aa-8d2c-b4a863336ab8.json | 21 ++- .../b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json | 4 +- .../c288e301-e626-4b98-a1ab-159dcb162af5.json | 4 +- .../ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json | 4 +- 11 files changed, 113 insertions(+), 88 deletions(-) diff --git a/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json b/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json index 6b28e57..8201459 100644 --- a/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json +++ b/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json @@ -1,7 +1,7 @@ { "id": "2e6f678f-472d-4c55-99cc-8e7c5c402a71", "snapshot": "gimp", - "instruction": "Please batch process the images on the desktop by lifting the brightness to 50.", + "instruction": "Please batch process all images on the desktop by increasing their brightness to 50, instead of adjusting them individually.", "source": "", "config": [ { @@ -40,4 +40,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json b/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json index 142be41..e58e13a 100644 --- a/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json +++ b/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json @@ -1,7 +1,7 @@ { "id": "58d3eeeb-e9d0-499f-962e-fd0db2a744d8", "snapshot": "gimp", - "instruction": "Please describe what is happening in the image.", + "instruction": "Based on the image above, translate the hidden audio conversation into French.", "source": "", "config": [ { @@ -33,4 +33,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json index 23a3a1e..e6a4e3a 100644 --- a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json +++ b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json @@ -1,7 +1,7 @@ { "id": "5ca86c6f-f317-49d8-b6a7-b527541caae8", "snapshot": "gimp", - "instruction": "Could you help me download the logo of the University of Hong Kong in \".png\" format within GIMP?", + "instruction": "Could you help me download the logo of the University of Hong Kong in '.png' format using only GIMP's built-in features, without launching a separate web browser?", "source": "", "config": [], "trajectory": "trajectories/", @@ -12,4 +12,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json index 4480ce4..ae6c23a 100644 --- a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json +++ b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json @@ -1,7 +1,7 @@ { "id": "62f7fd55-0687-4a43-b6e1-3eda16fc6252", "snapshot": "gimp", - "instruction": "Could you help me convert the image located at \"/home/user/logo.png\" to \".svg\" format?", + "instruction": "Could you help me convert the image located at \"/home/user/logo.png\" to \".svg\" format by GIMP?", "source": "", "config": [ { @@ -24,4 +24,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json b/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json index cd6139b..557e13c 100644 --- a/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json +++ b/evaluation_examples/examples/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46.json @@ -1,70 +1,82 @@ { - "id": "a746add2-cab0-4740-ac36-c3769d9bfb46", - "snapshot": "gimp", - "instruction": "Help me open up the Vignette filter window.", - "source": "https://www.youtube.com/watch?v=_L_MMU22bAw", - "config": [ - { - "type": "download", - "parameters": { - "files": [ - { - "url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46/dog_with_background.png", - "path": "/home/user/Desktop/dog_with_background.png" - } - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "gimp", - "/home/user/Desktop/dog_with_background.png" - ] - } - } - ], - "trajectory": "trajectories/", - "related_apps": [ - "gimp" - ], - "evaluator": { - "postconfig": [ - { - "type": "execute", - "parameters": { - "command": [ - "python3", - "-c", - "import pyautogui; pyautogui.press([\"esc\"]);pyautogui.hotkey([\"ctrl\", \"q\"]);" - ] + "id": "a746add2-cab0-4740-ac36-c3769d9bfb46", + "snapshot": "gimp", + "instruction": "Help me open up the Vignette filter window.", + "source": "https://www.youtube.com/watch?v=_L_MMU22bAw", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://agent-files.deva.msh.team/osworld/benchmark_files/gimp/a746add2-cab0-4740-ac36-c3769d9bfb46_dog_with_background.png", + "path": "/home/user/Desktop/dog_with_background.png" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "gimp", + "/home/user/Desktop/dog_with_background.png" + ] + } } - }, - { - "type": "sleep", - "parameters": { - "seconds": 0.5 - } - } ], - "func": "check_config_status", - "expected": { - "type": "rule", - "rules": { - "type:": "key-value", - "key": [ - "history-item", - "\"filters-vignette\"" + "trajectory": "trajectories/", + "related_apps": [ + "gimp" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "python3", + "-c", + "import pyautogui; pyautogui.press([\"esc\"]);pyautogui.hotkey([\"ctrl\", \"q\"]);" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python3", + "-c", + "import pyautogui; pyautogui.press([\"right\"]);pyautogui.press([\"enter\"]);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } ], - "value": "1" - } + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "cat /home/user/.config/GIMP/2.10/action-history", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "filters-vignette" + ], + "exclude": [ + "error", + "failed", + "not found" + ] + } + } }, - "result": { - "type": "gimp_config_file", - "file_name": "action-history", - "dest": "action-history" - } - }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json index 38b617c..e41d581 100644 --- a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json +++ b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json @@ -1,7 +1,7 @@ { "id": "d16c99dc-2a1e-46f2-b350-d97c86c85c15", "snapshot": "gimp", - "instruction": "Could you assist me with resizing just one layer of an image? I need to adjust the height to 512 pixels while maintaining the original aspect ratio?", + "instruction": "Could you assist me with resizing the dog layer of an image? I need to adjust the height to 512 pixels while maintaining the original aspect ratio?", "source": "https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only", "config": [ { @@ -104,8 +104,8 @@ "result": [ { "type": "vm_file", - "path": "/home/user/Desktop/dog_with_background.png", - "dest": "dog_with_background.png" + "path": "/home/user/Desktop/resized.png", + "dest": "resized.png" }, { "type": "vm_file", @@ -115,4 +115,4 @@ ] }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/gimp/d52d6308-ec58-42b7-a2c9-de80e4837b2b.json b/evaluation_examples/examples/gimp/d52d6308-ec58-42b7-a2c9-de80e4837b2b.json index db25026..b0c5b8b 100644 --- a/evaluation_examples/examples/gimp/d52d6308-ec58-42b7-a2c9-de80e4837b2b.json +++ b/evaluation_examples/examples/gimp/d52d6308-ec58-42b7-a2c9-de80e4837b2b.json @@ -1,7 +1,7 @@ { "id": "d52d6308-ec58-42b7-a2c9-de80e4837b2b", "snapshot": "gimp", - "instruction": "Could you help me remove the dock on the left side of the screen?", + "instruction": "Could you help me remove the dock on the left side of the screen in the GIMP?", "source": "https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box", "config": [ { @@ -52,4 +52,4 @@ } }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json b/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json index 3f8e972..a2fb256 100644 --- a/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json +++ b/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json @@ -19,13 +19,26 @@ "related_apps": [ "os" ], - "evaluator": { - "func": "check_text_enlarged", + "evaluator": { + "func": "check_include_exclude", "result": { "type": "vm_command_line", - "command": "gsettings get org.gnome.desktop.interface text-scaling-factor", + "command": "LARGE_TEXT=$(gsettings get org.gnome.desktop.interface text-scaling-factor 2>/dev/null | grep -E '1\\.[2-9]|[2-9]\\.' | wc -l) && ZOOM_ENABLED=$(gsettings get org.gnome.desktop.a11y.applications screen-magnifier-enabled 2>/dev/null | grep -c true) && ZOOM_FACTOR=$(gsettings get org.gnome.desktop.a11y.magnifier mag-factor 2>/dev/null | grep -E '[2-9]\\.|1\\.[1-9]' | wc -l) && if [ \"$LARGE_TEXT\" -gt 0 ]; then echo \"Large text enabled\"; elif [ \"$ZOOM_ENABLED\" -gt 0 ] && [ \"$ZOOM_FACTOR\" -gt 0 ]; then echo \"Zoom enabled with magnification\"; else echo \"No enlargement enabled\"; fi", "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "Large text enabled", + "Zoom enabled with magnification" + ], + "exclude": [ + "No enlargement enabled", + "error" + ] + } } }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json index 1c46f92..6f14b4c 100644 --- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json +++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json @@ -1,7 +1,7 @@ { "id": "b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa", "snapshot": "os", - "instruction": "I want to switch off the Bluetooth. Can you help me?", + "instruction": "I want to switch on the Bluetooth. Can you help me?", "source": "https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en", "trajectory": "trajectories/", "config": [], @@ -12,4 +12,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json index 1954036..b6d5fef 100644 --- a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json +++ b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json @@ -1,7 +1,7 @@ { "id": "c288e301-e626-4b98-a1ab-159dcb162af5", "snapshot": "os", - "instruction": "Please set the default Python version to Python3 on my Ubuntu system.", + "instruction": "Please set the default Python version to Python4 on my Ubuntu system.", "source": "https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu", "trajectory": "trajectories/", "related_apps": [ @@ -11,4 +11,4 @@ "func": "infeasible" }, "proxy": false -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json index c00afda..71d86fc 100644 --- a/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json +++ b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json @@ -1,7 +1,7 @@ { "id": "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3", "snapshot": "os", - "instruction": "Can you remove the first favorite app from 'favorites'?", + "instruction": "Can you remove vim from favorite app in 'favorites'?", "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", "config": [ { @@ -57,4 +57,4 @@ } }, "proxy": false -} \ No newline at end of file +}