From f21d6851089c7251889a01ea4d460e880cc60646 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 15:05:47 +0800 Subject: [PATCH] Update examples --- .../337d318b-aa07-4f4f-b763-89d9a2dd013f.json | 49 +++++++++ .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 89 +++++++++++++++ .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 54 ++++++++++ .../deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json | 101 ++++++++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json create mode 100644 evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json create mode 100644 evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json create mode 100644 evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json diff --git a/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json new file mode 100644 index 0000000..d2d4193 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json @@ -0,0 +1,49 @@ +{ + "id": "337d318b-aa07-4f4f-b763-89d9a2dd013f", + "snapshot": "libreoffice_calc", + "instruction": "Cross-check the invoices with the bank statements and identify any discrepancies. Then pull out the invoices that don't match the statements and put them in the \"problematic\" folder.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/invoice TII-20220301-90.pdf", + "url": "https://drive.google.com/uc?id=13y1Dkh9dJUaWeMSk6pg_UY-R7K4bhAJM&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # GES-20220215-82.pdf", + "url": "https://drive.google.com/uc?id=1zZYJQVpbGmqT_eH0x9Z5O7WoIFrQo3sN&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # 243729.pdf", + "url": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download" + }, + { + "path": "/home/user/Desktop/Bank-Statement.pdf", + "url": "https://drive.google.com/uc?id=1-KS6p0aip56iPmH4okhXZhLgqVwrcjfw&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/337d318b-aa07-4f4f-b763-89d9a2dd013f", + "related_apps": [ + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/problematic/Invoice # 243729.pdf", + "dest": "Invoice # 243729.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download", + "dest": "Invoice # 243729 Gold.pdf" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json new file mode 100644 index 0000000..deda04e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -0,0 +1,89 @@ +{ + "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "snapshot": "libreoffice_calc", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file so I can easily share it with others later.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/IDS LLM seminar/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00659.jpg", + "url": "https://drive.google.com/uc?id=1NjnSab2aEtJytYajM9FqeXsLm4ItxTsJ&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00657.jpg", + "url": "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00574.jpg", + "url": "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00554.jpg", + "url": "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00495.jpg", + "url": "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00454.jpg", + "url": "https://drive.google.com/uc?id=1t9L7fVQVxjovTQufetlogulIctn7DF_L&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "related_apps": [ + "os", + "image" + ], + "evaluator": { + "func": "compare_image_list", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/presenter/DSC00657.jpg", + "/home/user/Desktop/presenter/DSC00574.jpg", + "/home/user/Desktop/presenter/DSC00554.jpg", + "/home/user/Desktop/presenter/DSC00495.jpg" + ], + "dest": [ + "DSC00657.jpg", + "DSC00574.jpg", + "DSC00554.jpg", + "DSC00495.jpg" + ], + "multi": "true" + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download", + "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download", + "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download", + "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + ], + "dest": [ + "DSC00657_gold.jpg", + "DSC00574_gold.jpg", + "DSC00554_gold.jpg", + "DSC00495_gold.jpg" + ], + "multi": "true" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json new file mode 100644 index 0000000..1214c6d --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -0,0 +1,54 @@ +{ + "id": "8e116af7-7db7-4e35-a68b-b0939c066c78", + "snapshot": "libreoffice_calc", + "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download" + }, + { + "path": "/home/user/Desktop/receipt_0.jpeg", + "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download" + }, + { + "path": "/home/user/Desktop/receipt_1.jpg", + "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download" + }, + { + "path": "/home/user/Desktop/receipt_2.jpg", + "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download" + }, + { + "path": "/home/user/Desktop/receipt_3.pdf", + "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download" + }, + { + "path": "/home/user/Desktop/receipt_4.jpg", + "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", + "related_apps": [ + "", + "" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json new file mode 100644 index 0000000..0037591 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json @@ -0,0 +1,101 @@ +{ + "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "snapshot": "libreoffice_calc", + "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/New Large Language Models.xlsx" + } + } + ], + "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "New Large Language Models.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "dest": "New Large Language Models.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download", + "dest": "New Large Language Models Gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "B2:B5", + "C2:C5" + ], + "type": "exact_match" + }, + { + "range": [ + "A2:A5" + ], + "type": "fuzzy_match", + "threshold": 90, + "ignore_case": true + } + ] + } + ] + } + } +}