From 8df2233730956a5e44c5fb696ec579111a0edbc3 Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 8 Mar 2024 19:25:51 +0800 Subject: [PATCH] add multi-turn examples (in total, add 12 examples by ruisheng.cao 2024-03-08) --- .../5df7b33a-9f77-4101-823e-02f863e1c1ae.json | 88 ++++++++----------- .../aceb0368-56b8-4073-b70e-3dc9aee184e0.json | 87 ++++++++++++++++++ .../c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json | 58 ++++++++++++ 3 files changed, 180 insertions(+), 53 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json create mode 100644 evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json diff --git a/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json index 64bdee1..e00b198 100644 --- a/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json +++ b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json @@ -1,33 +1,17 @@ { - "id": "df67aebb-fb3a-44fd-b75b-51b6012df509", - "snapshot": "vscode", - "instruction": "I am writing my paper thesis. I have listed all referenced papers in the opened docx. But my mentor asked me to use latex instead of word writer. So could you help me export the dblp bibtex of these papers into 'references.bib'. By the way, if the paper is published, do not use the arxiv version. Separate each bibtex dict with a blank line for clarity.", + "id": "5df7b33a-9f77-4101-823e-02f863e1c1ae", + "snapshot": "libreoffice_writer", + "instruction": "I enjoy reading during my spare time, but this book is too bulky. Each time I open it, I have to find where I left off, which is a hassle. I'd like to divide the book into several PDFs, each containing a different chapter, and name them following the example I set with chapter one.", "source": "authors", "config": [ { - "type": "launch", + "type": "download", "parameters": { - "command": [ - "google-chrome", - "--remote-debugging-port=1337" - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "socat", - "tcp-listen:9222,fork", - "tcp:localhost:1337" - ] - } - }, - { - "type": "chrome_open_tabs", - "parameters": { - "urls_to_open": [ - "https://dblp.org/" + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1iaPADtkbDGbupuwNQiG-L9Topt_VJl8g&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/book.zip" + } ] } }, @@ -35,8 +19,9 @@ "type": "execute", "parameters": { "command": [ - "touch", - "/home/user/Desktop/references.bib" + "/bin/bash", + "-c", + "unzip -q /home/user/Desktop/book.zip -d /home/user/Desktop && rm /home/user/Desktop/book.zip" ] } }, @@ -44,52 +29,49 @@ "type": "launch", "parameters": { "command": [ - "code", - "/home/user/Desktop/references.bib" + "nautilus", + "/home/user/Desktop/book" ] } }, { - "type": "download", + "type": "open", "parameters": { - "files": [ - { - "url": "https://drive.usercontent.google.com/download?id=1NDYJ7XyiUVJYuMBDAYvObrSZdODrhUMi&export=download&authuser=0&confirm=t", - "path": "/home/user/Desktop/references.docx" - } - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "libreoffice", - "--writer", - "/home/user/Desktop/references.docx" - ] + "path": "/home/user/Desktop/book/Spectral Graph Theory.pdf" } } ], "trajectory": "trajectories/", "related_apps": [ - "chrome", + "os", "libreoffice_writer" ], "evaluator": { - "func": "compare_text_file", + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "cd /home/user/Desktop/book && zip -qr book.zip *.pdf" + ] + } + } + ], + "func": "compare_archive", "result": { "type": "vm_file", - "path": "/home/user/Desktop/references.bib", - "dest": "references.bib" + "path": "/home/user/Desktop/book/book.zip", + "dest": "book.zip" }, "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1eeaPRSAWhzsNGFuda_u6phsdQMZHBkb2&export=download&authuser=0&confirm=t", - "dest": "gold_references.bib" + "path": "https://drive.usercontent.google.com/download?id=1oH9UAbJe4EmPxwDX4kQPVyKqGF2y2pQh&export=download&authuser=0&confirm=t", + "dest": "gold_book.zip" }, "options": { - "ignore_blanks": true + "file_type": "pdf" } } } \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json b/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json new file mode 100644 index 0000000..5d3394e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json @@ -0,0 +1,87 @@ +{ + "id": "aceb0368-56b8-4073-b70e-3dc9aee184e0", + "snapshot": "libreoffice_calc", + "instruction": "I am grading students' English exam papers, but the test consists only of multiple-choice questions. It's too exhausting to check each question one by one and record the detailed scores. Can you help me compare the remaining students' answers with the answer key and record the detailed scoring in the opened spreadsheet?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1CfzBZxhhV1_vUKhxsP_4M574-XFm1qIo&export=download&authuser=0&confirm=t", + "path": "/home/user/exam.zip" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "unzip", + "/home/user/exam.zip", + "-d", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/exam/ReferenceAnswers.docx" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--calc", + "/home/user/exam/grades.xlsx" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/exam/" + ] + + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "os" + ], + "evaluator": { + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a0gZwixcJuZAUtqGR3L5P20JmoWfQU_d&export=download&authuser=0&confirm=t", + "dest": "gold_grades.xlsx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/exam/grades.xlsx", + "dest": "grades.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json new file mode 100644 index 0000000..dcd790e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json @@ -0,0 +1,58 @@ +{ + "id": "c2751594-0cd5-4088-be1b-b5f2f9ec97c4", + "snapshot": "thunderbird", + "instruction": "Help me export the first image from the doc file attached in the most recent email in Notes folder, and set this image as the new desktop background.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1JcDjBGBP5ly90pdVLs3ySoe6qX9Ht_d6&export=download&authuser=0&confirm=t", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "--recursive-unlink", + "-xz", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "os" + ], + "evaluator": { + "func": "compare_images", + "result": { + "type": "vm_wallpaper", + "dest": "background.png" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1iIdjmXD6fRSYtpqxujTGzx8agvIgXsqz&export=download&authuser=0&confirm=t", + "dest": "gold_background.png" + } + } +} \ No newline at end of file