From 88cc41cdf239bb6d27190b4bf5e53e38e346be06 Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Mon, 4 Mar 2024 11:40:02 +0800 Subject: [PATCH 1/5] update multi apps --- .../69acbb55-d945-4927-a87b-8480e1a5bb7e.json | 50 +++++++++++ .../acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json | 49 +++++++++++ .../eb303e01-261e-4972-8c07-c9b4e7a4922a.json | 82 +++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json create mode 100644 evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json create mode 100644 evaluation_examples/examples/multi_apps/eb303e01-261e-4972-8c07-c9b4e7a4922a.json diff --git a/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json b/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json new file mode 100644 index 0000000..8e75c5f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json @@ -0,0 +1,50 @@ +{ + "id": "69acbb55-d945-4927-a87b-8480e1a5bb7e", + "snapshot": "os", + "instruction": "Set up the environment for the project in /home/user according to the guidance on \"https: //github.com/xlang-ai/instructor-embedding\".", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/69acbb55-d945-4927-a87b-8480e1a5bb7e", + "related_apps": [ + "os", + "chrome", + "vscode" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "command": [ + "python", + "-c", + "from InstructorEmbedding import INSTRUCTOR;" + ] + }, + "expected": { + "type": "rule", + "rules": { + "expected": "" + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json b/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json new file mode 100644 index 0000000..a816523 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json @@ -0,0 +1,49 @@ +{ + "id": "acb0f96b-e27c-44d8-b55f-7cb76609dfcd", + "snapshot": "os", + "instruction": "Please help me clone the repo \"https://github.com/xlang-ai/instructor-embedding\" to /home/user.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/acb0f96b-e27c-44d8-b55f-7cb76609dfcd", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "func": "is_extension_installed", + "result": { + "type": "vm_command_line", + "command": [ + "ls", + "/home/user/" + ] + }, + "expected": { + "type": "rule", + "rules": { + "type": "contain", + "expected": "instructor-embedding" + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/eb303e01-261e-4972-8c07-c9b4e7a4922a.json b/evaluation_examples/examples/multi_apps/eb303e01-261e-4972-8c07-c9b4e7a4922a.json new file mode 100644 index 0000000..7ee7e1a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/eb303e01-261e-4972-8c07-c9b4e7a4922a.json @@ -0,0 +1,82 @@ +{ + "id": "eb303e01-261e-4972-8c07-c9b4e7a4922a", + "snapshot": "libreoffice_impress", + "instruction": "I am going to give a talk tomorrow, and my PPT and speaking notes are saved on the desktop. Help me Insert what I'm going to say for each slide in \"note\" of PPT as a reminder. I have done it for some slides. Help me finish the job for the rest.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1MdgN8ECxzLgHgjq8eKqrNQt3MPDjnKwa&export=download&authuser=0&confirm=t&uuid=ed5c37db-c565-4ca1-bbd1-bbdba13e9306&at=APZUnTUNi8YTLlZqMZ0r--bBpBEG:1709449877819", + "path": "/home/user/Desktop/lecture1-2021-with-ink.pptx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1FkPOcsWpsjUXSUld1NblwyVzcsE19uIe&export=download&authuser=0&confirm=t&uuid=27501bc0-732b-4ff7-abf4-a52427aea264&at=APZUnTWleaafIVF2iZkiuHo0vQ66:1709449873140", + "path": "/home/user/Desktop/notes.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/lecture1-2021-with-ink.pptx" + } + } + ], + "trajectory": "trajectories/eb303e01-261e-4972-8c07-c9b4e7a4922a", + "related_apps": [ + "libreoffice_impress", + "libreoffice_writer" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "lecture1-2021-with-ink.pptx - LibreOffice Impress", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_pptx_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=18orj_0q6N4w7ijADOJeU5ZkDDw-RdFUl&export=download&authuser=0&confirm=t&uuid=c05d2bce-bccb-4504-8fe4-7c409788d727&at=APZUnTVlCicnIm0cMdJ9FrZg4MSN:1709453015475", + "dest": "lecture1-2021-with-ink_Gold.pptx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/lecture1-2021-with-ink.pptx", + "dest": "lecture1-2021-with-ink.pptx" + }, + "options": { + "examine_shape": false, + "examine_bullets": false + } + } +} \ No newline at end of file From 549ff4b1dff983ea6db208279dd6a012b0a1cb2c Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Mon, 4 Mar 2024 17:38:39 +0800 Subject: [PATCH 2/5] Update examples --- .../26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json | 25 ++++++++ .../36037439-2044-4b50-b9d1-875b5a332143.json | 64 +++++++++++++++++++ .../a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json | 54 ++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json create mode 100644 evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json create mode 100644 evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json diff --git a/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json b/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json new file mode 100644 index 0000000..d09222a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json @@ -0,0 +1,25 @@ +{ + "id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38", + "snapshot": "libreoffice_calc", + "instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed.", + "source": "authors", + "config": [ + ], + "trajectory": "trajectories/26660ad1-6ebb-4f59-8cba-a8432dfe8d38", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json b/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json new file mode 100644 index 0000000..a6174f7 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/36037439-2044-4b50-b9d1-875b5a332143.json @@ -0,0 +1,64 @@ +{ + "id": "36037439-2044-4b50-b9d1-875b5a332143", + "snapshot": "libreoffice_calc", + "instruction": "Could you please pull up the Google Scholar page of the corresponding author for me in Chrome?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1iTjv59rn8vcvUsh2-M7T5BLyNoutCwYo&export=download&authuser=0&confirm=t&uuid=cc13ea06-8d21-4d15-adb4-8fec94304bde&at=APZUnTX9ydwrAV0UPVKzYS9-LBlH:1709520068240", + "path": "/home/user/Desktop/shi17a.pdf" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/shi17a.pdf" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-40)" + ] + } + } + ], + "trajectory": "trajectories/36037439-2044-4b50-b9d1-875b5a332143", + "related_apps": [ + "OS", + "Chrome" + ], + "evaluator": { + "func": "is_expected_url_pattern_match", + "result": { + "type": "active_url_from_accessTree", + "goto_prefix": "https://" + }, + "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "?hl=en&", + "expected": [ + "https://scholar.google.com/citations", + "user=qRAQ5BsAAAAJ" + ] + } + } + } +} diff --git a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json new file mode 100644 index 0000000..63cd0d5 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json @@ -0,0 +1,54 @@ +{ + "id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0", + "snapshot": "libreoffice_calc", + "instruction": "I'm really enjoying this paper. Could you please find the personal webpages of the first author and the last three authors, and add them to a browser bookmark folder named 'Liked Authors'?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1IlJ8kU5MlR6OqJHchsSUJzLCmcrG-8N7&export=download&authuser=0&confirm=t&uuid=d2a1810f-edea-4bfd-9d79-e668b9f11876&at=APZUnTVv_eqtC86YzkEU8_jIhC9W:1709522229162", + "path": "/home/user/Desktop/2206.08853.pdf" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/2206.08853.pdf" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)" + ] + } + } + ], + "trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} From e42f5e2a8feefa58301d81555924240abef46528 Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Mon, 4 Mar 2024 21:43:28 +0800 Subject: [PATCH 3/5] update multi apps --- .../48d05431-6cd5-4e76-82eb-12b60d823f7d.json | 62 ++++++++++++++++ .../81c425f5-78f3-4771-afd6-3d2973825947.json | 69 ++++++++++++++++++ .../aad10cd7-9337-4b62-b704-a857848cedf2.json | 70 +++++++++++++++++++ .../bb83cab4-e5c7-42c7-a67b-e46068032b86.json | 69 ++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/48d05431-6cd5-4e76-82eb-12b60d823f7d.json create mode 100644 evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json create mode 100644 evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json create mode 100644 evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json diff --git a/evaluation_examples/examples/multi_apps/48d05431-6cd5-4e76-82eb-12b60d823f7d.json b/evaluation_examples/examples/multi_apps/48d05431-6cd5-4e76-82eb-12b60d823f7d.json new file mode 100644 index 0000000..d69e12e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/48d05431-6cd5-4e76-82eb-12b60d823f7d.json @@ -0,0 +1,62 @@ +{ + "id": "48d05431-6cd5-4e76-82eb-12b60d823f7d", + "snapshot": "os", + "instruction": "When I ran \"conda install datasets\" in terminal, I got \"conda: command not found\". Could you help me solve it?", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + } + ], + "trajectory": "trajectories/48d05431-6cd5-4e76-82eb-12b60d823f7d", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "func": "is_in_list", + "result": { + "type": "vm_command_line", + "command": "conda list", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "expected": "packages in environment at" + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json new file mode 100644 index 0000000..85f244e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json @@ -0,0 +1,69 @@ +{ + "id": "81c425f5-78f3-4771-afd6-3d2973825947", + "snapshot": "libreoffice_calc", + "instruction": "Could you help me transfer LibreOffice Calc data in this file to a LibreOffice Writer table? Save it as \"price.docx\" on the desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/OSP_Envelope_Price-List_2023_5000.xlsx", + "url": "https://drive.usercontent.google.com/download?id=1tXhQ1aG0JMbp3Yz643QRCmJBCHKE_trr&export=download&authuser=0&confirm=t&uuid=b4441d6c-48ce-4d49-a8cb-9239e8e20d1a&at=APZUnTU-Ba8Vx0N8e1nuomP_-4mm:1709556978086" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/OSP_Envelope_Price-List_2023_5000.xlsx" + } + } + ], + "trajectory": "trajectories/81c425f5-78f3-4771-afd6-3d2973825947", + "related_apps": [ + "libreoffice_writer", + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "price.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);" + ] + } + } + ], + "func": "compare_docx_tables", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/price.docx", + "dest": "price.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1KbdlQC0qSAYewG8QnZgParnSwv3s3dub&export=download&authuser=0&confirm=t&uuid=15dcc25c-8168-425e-96e1-fd27e0d6904b&at=APZUnTVho4ZrREHf9DC4rKwdIi3R:1709557117932", + "dest": "price.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json new file mode 100644 index 0000000..950703b --- /dev/null +++ b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json @@ -0,0 +1,70 @@ +{ + "id": "aad10cd7-9337-4b62-b704-a857848cedf2", + "snapshot": "chrome", + "instruction": "I want to get a local file version of this blog on https://developer.apple.com/design/human-interface-guidelines/searching to make my own revision. Just keep the main content on this page, which starts from 'searching' and ends before 'resources'. Help me save this blog into \"notes.docx\" on the desktop.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/aad10cd7-9337-4b62-b704-a857848cedf2", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "notes.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": "compare_docx_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1r2KJv0H3foo1WlWnArxdXnaew-yixNqL&export=download&authuser=0&confirm=t&uuid=633cc27c-d38b-4c45-907d-025341b4af1c&at=APZUnTV8AW5F_aLVooprdfgt-Q-Z:1709547335200", + "dest": "notes.docx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/notes.docx", + "dest": "notes.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json new file mode 100644 index 0000000..c61858e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json @@ -0,0 +1,69 @@ +{ + "id": "bb83cab4-e5c7-42c7-a67b-e46068032b86", + "snapshot": "libreoffice_impress", + "instruction": "I'd like to save an Impress file as a document that I can edit in Writer. Just keep all the text in impress in \"script.docx\" on Desktop and I will do the reformatting by myself. Thanks!", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Unlocking-the-Power-of-ChatGPT.pptx", + "url": "https://drive.usercontent.google.com/download?id=15JpEBadJeJXmkaMOMNHV5QjyVmuofQMB&export=download&authuser=0&confirm=t&uuid=308636e7-6d09-4b72-81df-cc791fc83d30&at=APZUnTV4I6P3uydAZJ-Ls1n_i8sK:1709558907727" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Unlocking-the-Power-of-ChatGPT.pptx" + } + } + ], + "trajectory": "trajectories/bb83cab4-e5c7-42c7-a67b-e46068032b86", + "related_apps": [ + "libreoffice_writer", + "libreoffice_impress", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "script.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);" + ] + } + } + ], + "func": "compare_docx_tables", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/script.docx", + "dest": "script.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1-Sol1W8S7Ybj-3KBJJarbcYUqS5wAQ1C&export=download&authuser=0&confirm=t&uuid=d967f546-b8f7-4ac2-b8fc-b1635f1cfbc4&at=APZUnTUazmbS2X3BSXDEQtJgobgf:1709559012053", + "dest": "script.docx" + } + } +} \ No newline at end of file From 69ef653a7c35049a877370d54023f9fe356534bd Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Tue, 5 Mar 2024 22:46:56 +0800 Subject: [PATCH 4/5] update multi apps --- desktop_env/evaluators/metrics/__init__.py | 9 +- desktop_env/evaluators/metrics/chrome.py | 33 +++++++ desktop_env/evaluators/metrics/docs.py | 23 +++++ desktop_env/evaluators/metrics/vscode.py | 29 +++++++ .../20236825-b5df-46e7-89bf-62e1d640a897.json | 58 +++++++++++++ .../227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json | 69 +++++++++++++++ .../8df7e444-8e06-4f93-8a1a-c5c974269d82.json | 47 ++++++++++ .../a503b07f-9119-456b-b75d-f5146737d24f.json | 47 ++++++++++ .../b337d106-053f-4d37-8da0-7f9c4043a66b.json | 85 +++++++++++++++++++ 9 files changed, 397 insertions(+), 3 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json create mode 100644 evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json create mode 100644 evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json create mode 100644 evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json create mode 100644 evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 527dbe5..c965a95 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -18,7 +18,8 @@ from .chrome import ( is_expected_search_query, is_expected_active_tab, is_expected_url_pattern_match, - is_added_to_steam_cart + is_added_to_steam_cart, + compare_pdf_images ) from .docs import ( compare_font_names, @@ -45,7 +46,8 @@ from .docs import ( is_first_line_centered, check_file_exists, check_tabstops, - compare_contains_image + compare_contains_image, + compare_docx_images ) from .general import ( check_csv, @@ -125,7 +127,8 @@ from .vscode import ( check_json_settings, check_json_keybindings, check_python_file_by_test_suite, - check_python_file_by_gold_file + check_python_file_by_gold_file, + compare_zip_files ) diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 0a76bf0..3c367b3 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") return score / len(pdf2_path) +import fitz +from PIL import Image +from io import BytesIO + +def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: + def extract_images_from_pdf(pdf_path): + pdf_document = fitz.open(pdf_path) + images = [] + + for page_number in range(pdf_document.page_count): + page = pdf_document[page_number] + image_list = page.get_images(full=True) + + for img_index, img_info in enumerate(image_list): + base_image = pdf_document.extract_image(img_index) + image_bytes = base_image["image"] + + images.append(BytesIO(image_bytes)) + + return images + + images1 = extract_images_from_pdf(pdf1_path) + images2 = extract_images_from_pdf(pdf2_path) + + if len(images1) != len(images2): + return 0. + + for i, (img1, img2) in enumerate(zip(images1, images2), 1): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0. + + return 1. + def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: """ diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a17da39..b44df9c 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -159,6 +159,29 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 +from io import BytesIO +from PIL import Image + +def compare_docx_images(docx_file1, docx_file2): + doc1 = Document(docx_file1) + doc2 = Document(docx_file2) + + def extract_images(doc): + images = [] + for rel in doc.part.rels.values(): + if "image" in rel.reltype: + img_data = rel.target_part.blob + images.append(BytesIO(img_data)) + return images + + images1 = extract_images(doc1) + images2 = extract_images(doc2) + if len(images1) != len(images2): + return 0 + for img1, img2 in zip(images1, images2): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0 + return 1 def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index ecf4e10..a3e6779 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float: return 1.0 return 0.0 +import zipfile + +def compare_zip_files(actual: str, expected: str, **options) -> float: + """ + Args: + actual (str): path to result zip file + expected (str): path to gold zip file + + Return: + float: the score + """ + if not actual: + return 0. + + with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2: + file_list1 = set(zip_file1.namelist()) + file_list2 = set(zip_file2.namelist()) + + if file_list1 != file_list2: + return 0.0 + + for file_name in file_list1: + content1 = zip_file1.read(file_name) + content2 = zip_file2.read(file_name) + + if content1 != content2: + return 0.0 + return 1.0 + def compare_config(actual: str, rules: Dict, **options) -> float: if not actual: diff --git a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json new file mode 100644 index 0000000..550755b --- /dev/null +++ b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json @@ -0,0 +1,58 @@ +{ + "id": "20236825-b5df-46e7-89bf-62e1d640a897", + "snapshot": "vscode", + "instruction": "I am coding on my algorithm practice. The doc \"bubble_Sort_tutorial.docx\" is the document for it. Help me finish the function 'bubbleSort' in 'bubbleSort.py' on the Desktop save the output in 'res.txt' on Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1AQtZ8Hrf7WtyUtaHMtDN-UErKRXnW64d&export=download&authuser=0&confirm=t&uuid=bf7f2b4b-ecf9-4260-b74e-db0cd40b58ca&at=APZUnTVimJzbmwJ8-4E1lq9ipJf_:1709624149129", + "path": "/home/user/Desktop/bubbleSort.zip" + }, + { + "url": "https://drive.usercontent.google.com/download?id=168ZHCnK6v5PEZ8G5M25ZUW9fICk4OlfE&export=download&authuser=0&confirm=t&uuid=3642df08-dc40-4d37-93a0-8532e3012fb0&at=APZUnTUP1OTlq0kIgqcj7YSWw6MB:1709622592489", + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "unzip /home/user/Desktop/bubbleSort.zip -d /home/user/Desktop/ && rm -rf /home/user/Desktop/bubbleSort.zip" + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + } + ], + "trajectory": "trajectories/20236825-b5df-46e7-89bf-62e1d640a897", + "related_apps": [ + "vscode", + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_text_file", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315", + "dest": "res.txt" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/res.txt", + "dest": "res.txt" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json new file mode 100644 index 0000000..f186383 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json @@ -0,0 +1,69 @@ +{ + "id": "227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "snapshot": "gimp", + "instruction": "I have my .xcf file saved on Desktop. Could you help me copy the image and paste it into a Libreoffice Writer file? Save it as 'image.docx' on the Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/QTdHniCqfJbBLJe3L3nijU-1200-80.xcf", + "url": "https://drive.usercontent.google.com/download?id=1BGoDOu9bYIG7Twj5dVTxRIWDUgWzzDtP&export=download&authuser=0&confirm=t&uuid=235d1bb2-37a0-4d96-82bf-a87f31d03cb4&at=APZUnTX53EdR1stASFS3OH5luAtB:1709617456061" + } + ] + } + } + ], + "trajectory": "trajectories/227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "related_apps": [ + "libreoffice_writer", + "gimp", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "image.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_docx_images", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/image.docx", + "dest": "image.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292", + "dest": "image.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json new file mode 100644 index 0000000..12263d2 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json @@ -0,0 +1,47 @@ +{ + "id": "8df7e444-8e06-4f93-8a1a-c5c974269d82", + "snapshot": "libreoffice_writer", + "instruction": "In the \"reminder.docx\" on Desktop is the submission instruction of our essay work. My essay is saved as docx file in /home/user. Please help me prepare the files for submission as required. ", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1wrCJd2f0xYnrHcj6KDzCe96X9JsN3VI3&export=download&authuser=0&confirm=t&uuid=277cb94d-1981-4f4d-b1ba-bceac8146001&at=APZUnTWKU5DBnr_6-_ZlEdsvhpCz:1709633482673", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.docx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1vzawJthEhQHcK4cUF0W9QT4zaFywO6aH&export=download&authuser=0&confirm=t&uuid=0fbb6a50-f9c1-44d2-b430-3af738d5fadc&at=APZUnTWyfv-N5f-EjnF8ob-VfCsD:1709633450986", + "path": "/home/user/Desktop/reminder.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/reminder.docx" + } + } + ], + "trajectory": "trajectories/8df7e444-8e06-4f93-8a1a-c5c974269d82", + "related_apps": [ + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_zip_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.zip", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json new file mode 100644 index 0000000..f6f506f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json @@ -0,0 +1,47 @@ +{ + "id": "a503b07f-9119-456b-b75d-f5146737d24f", + "snapshot": "os", + "instruction": "I am preparing for my paper reading report. Could you help me download this image at https://github.com/xlang-ai/OpenAgents/blob/main/pics/openagents_overview.png and convert it to PDF format. Save it as \"openagents_overview.pdf\" on desktop.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/a503b07f-9119-456b-b75d-f5146737d24f", + "related_apps": [ + "os", + "chrome", + "gimp", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1gD1odFNuLT6TP6rpAv_hot86pKcI5wY1&export=download&authuser=0&confirm=t&uuid=9d17c689-95d3-45e4-b093-0165de4045b4&at=APZUnTV9SssKQCoYyYeVsi8e9zcX:1709649491796", + "dest": "openagents_overview.pdf" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/openagents_overview.pdf", + "dest": "openagents_overview.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json new file mode 100644 index 0000000..9289558 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json @@ -0,0 +1,85 @@ +{ + "id": "b337d106-053f-4d37-8da0-7f9c4043a66b", + "snapshot": "os", + "instruction": "I've recently wanted to try using the Vim editor to edit code, but my Vim editor doesn't show relative line numbers by default. Please search the internet for a tutorial on how to display relative line numbers and make it the default setting for my local Vim.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + } + ], + "trajectory": "trajectories/b337d106-053f-4d37-8da0-7f9c4043a66b", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1CyhWjUS2oov4Fzc0VRwTh6LiS2Qu-T_8&export=download&authuser=0&confirm=t&uuid=9d0e2c62-895c-4bb3-a057-30cae60329ed&at=APZUnTVngSwARjYsWSmhSyHAqwID:1709647023362", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + } + ], + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "The File Has Set Number!" + ], + "exclude": [] + } + } + } +} \ No newline at end of file From f21d6851089c7251889a01ea4d460e880cc60646 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 15:05:47 +0800 Subject: [PATCH 5/5] Update examples --- .../337d318b-aa07-4f4f-b763-89d9a2dd013f.json | 49 +++++++++ .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 89 +++++++++++++++ .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 54 ++++++++++ .../deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json | 101 ++++++++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json create mode 100644 evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json create mode 100644 evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json create mode 100644 evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json diff --git a/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json new file mode 100644 index 0000000..d2d4193 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json @@ -0,0 +1,49 @@ +{ + "id": "337d318b-aa07-4f4f-b763-89d9a2dd013f", + "snapshot": "libreoffice_calc", + "instruction": "Cross-check the invoices with the bank statements and identify any discrepancies. Then pull out the invoices that don't match the statements and put them in the \"problematic\" folder.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/invoice TII-20220301-90.pdf", + "url": "https://drive.google.com/uc?id=13y1Dkh9dJUaWeMSk6pg_UY-R7K4bhAJM&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # GES-20220215-82.pdf", + "url": "https://drive.google.com/uc?id=1zZYJQVpbGmqT_eH0x9Z5O7WoIFrQo3sN&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # 243729.pdf", + "url": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download" + }, + { + "path": "/home/user/Desktop/Bank-Statement.pdf", + "url": "https://drive.google.com/uc?id=1-KS6p0aip56iPmH4okhXZhLgqVwrcjfw&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/337d318b-aa07-4f4f-b763-89d9a2dd013f", + "related_apps": [ + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/problematic/Invoice # 243729.pdf", + "dest": "Invoice # 243729.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download", + "dest": "Invoice # 243729 Gold.pdf" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json new file mode 100644 index 0000000..deda04e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -0,0 +1,89 @@ +{ + "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "snapshot": "libreoffice_calc", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file so I can easily share it with others later.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/IDS LLM seminar/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00659.jpg", + "url": "https://drive.google.com/uc?id=1NjnSab2aEtJytYajM9FqeXsLm4ItxTsJ&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00657.jpg", + "url": "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00574.jpg", + "url": "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00554.jpg", + "url": "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00495.jpg", + "url": "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00454.jpg", + "url": "https://drive.google.com/uc?id=1t9L7fVQVxjovTQufetlogulIctn7DF_L&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "related_apps": [ + "os", + "image" + ], + "evaluator": { + "func": "compare_image_list", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/presenter/DSC00657.jpg", + "/home/user/Desktop/presenter/DSC00574.jpg", + "/home/user/Desktop/presenter/DSC00554.jpg", + "/home/user/Desktop/presenter/DSC00495.jpg" + ], + "dest": [ + "DSC00657.jpg", + "DSC00574.jpg", + "DSC00554.jpg", + "DSC00495.jpg" + ], + "multi": "true" + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download", + "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download", + "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download", + "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + ], + "dest": [ + "DSC00657_gold.jpg", + "DSC00574_gold.jpg", + "DSC00554_gold.jpg", + "DSC00495_gold.jpg" + ], + "multi": "true" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json new file mode 100644 index 0000000..1214c6d --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -0,0 +1,54 @@ +{ + "id": "8e116af7-7db7-4e35-a68b-b0939c066c78", + "snapshot": "libreoffice_calc", + "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download" + }, + { + "path": "/home/user/Desktop/receipt_0.jpeg", + "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download" + }, + { + "path": "/home/user/Desktop/receipt_1.jpg", + "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download" + }, + { + "path": "/home/user/Desktop/receipt_2.jpg", + "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download" + }, + { + "path": "/home/user/Desktop/receipt_3.pdf", + "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download" + }, + { + "path": "/home/user/Desktop/receipt_4.jpg", + "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", + "related_apps": [ + "", + "" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json new file mode 100644 index 0000000..0037591 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json @@ -0,0 +1,101 @@ +{ + "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "snapshot": "libreoffice_calc", + "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/New Large Language Models.xlsx" + } + } + ], + "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "New Large Language Models.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "dest": "New Large Language Models.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download", + "dest": "New Large Language Models Gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "B2:B5", + "C2:C5" + ], + "type": "exact_match" + }, + { + "range": [ + "A2:A5" + ], + "type": "fuzzy_match", + "threshold": 90, + "ignore_case": true + } + ] + } + ] + } + } +}