From 15967704109cc49c87713f6081a25042ab03cf8f Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 15 Feb 2024 19:24:22 +0800 Subject: [PATCH] Add new os examples --- .../0b35aee5-6f2a-4c71-b2da-aed0105fdbde.json | 59 ++++++++++++++ .../37887e8c-da15-4192-923c-08fa390a176d.json | 75 +++++++++++++++++ .../4127319a-8b79-4410-b58a-7a151e15f3d7.json | 53 ++++++++++++ .../4783cc41-c03c-4e1b-89b4-50658f642bd5.json | 36 +++++++++ .../5c1075ca-bb34-46a3-a7a0-029bd7463e79.json | 80 +++++++++++++++++++ .../5ced85fc-fa1a-4217-95fd-0fb530545ce2.json | 57 +++++++++++++ .../a462a795-fdc7-4b23-b689-e8b6df786b78.json | 9 +-- .../c288e301-e626-4b98-a1ab-159dcb162af5.json | 2 +- .../e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json | 17 ---- 9 files changed, 362 insertions(+), 26 deletions(-) create mode 100644 evaluation_examples/examples/os/0b35aee5-6f2a-4c71-b2da-aed0105fdbde.json create mode 100644 evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json create mode 100644 evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json create mode 100644 evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json create mode 100644 evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json create mode 100644 evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json delete mode 100644 evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json diff --git a/evaluation_examples/examples/os/0b35aee5-6f2a-4c71-b2da-aed0105fdbde.json b/evaluation_examples/examples/os/0b35aee5-6f2a-4c71-b2da-aed0105fdbde.json new file mode 100644 index 0000000..fb82d70 --- /dev/null +++ b/evaluation_examples/examples/os/0b35aee5-6f2a-4c71-b2da-aed0105fdbde.json @@ -0,0 +1,59 @@ +{ + "id": "0b35aee5-6f2a-4c71-b2da-aed0105fdbde", + "snapshot": "os", + "instruction": "Calculate the md5 sum of \"submission.cpp\" with less sensitivity to superficial changes like comments or whitespace", + "source": "NL2Bash", + "config": [ + { + "type": "execute", + "parameters": { + "command": "echo password | sudo -S apt-get install astyle cpp md5sum -y", + "shell": true + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1UiDgRdPqGGPmklZyXhwO1-UyOlhJsxt0&export=download&authuser=0&confirm=t&uuid=7795d6f5-87df-4ebb-8540-e9756362736a&at=APZUnTVk-7AnKL4ofVZGKQeHxjHY:1707987053722", + "path": "/home/user/Desktop/submission.cpp" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_include_exclude", + "result": { + "type": "vm_terminal_output" + }, + "expected": { + "type": "calc_insensitive_md5" + } + } +} diff --git a/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json new file mode 100644 index 0000000..76e5ec6 --- /dev/null +++ b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json @@ -0,0 +1,75 @@ +{ + "id": "37887e8c-da15-4192-923c-08fa390a176d", + "snapshot": "os", + "instruction": "Compress all files in the \"/tmp/test_files\" directory tree that were last modified 30 days ago", + "source": "NL2Bash", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1auLclNUSBieSHZp2tLOUT7tA0ejzz4X6&export=download&authuser=0&confirm=t&uuid=142ef302-89af-4511-afa7-11ec7e8c3397&at=APZUnTVxFDmqv0dbWMC0VdlRziXD:1707992474338", + "path": "setup.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x setup.sh", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "./setup.sh", + "shell": true + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1DakvqJfSokEPuH8_LYfSCBbM7Fws1F0o&export=download&authuser=0&confirm=t&uuid=4950eb71-7881-4b52-a94c-a3eed9d2213f&at=APZUnTUy4o4r1ScCnTgwPwNyqhPr:1707992479376", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + } + ], + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "Success: The task was completed correctly." + ], + "exclude": [ + ] + } + } + } +} diff --git a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json new file mode 100644 index 0000000..c1ed512 --- /dev/null +++ b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json @@ -0,0 +1,53 @@ +{ + "id": "4127319a-8b79-4410-b58a-7a151e15f3d7", + "snapshot": "os", + "instruction": "Use terminal command to count all the lines of all php files in current directory recursively, show the result on the terminal", + "source": "NL2Bash", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1mp3bXrhKutWzxHnQc9v2Dtt4xclLemik&export=download&authuser=0&confirm=t&uuid=351dc4ac-ecf6-448b-bb09-ebf1231bfe5a&at=APZUnTUlgWg3o8U2Ghl2xRWEXFjz:1707993730783", + "path": "setup.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x setup.sh", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "./setup.sh", + "shell": true + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_include_exclude", + "result": { + "type": "vm_terminal_output" + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "54\n" + ], + "exclude": [ + ] + } + } + } +} diff --git a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json new file mode 100644 index 0000000..4981df0 --- /dev/null +++ b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json @@ -0,0 +1,36 @@ +{ + "id": "4783cc41-c03c-4e1b-89b4-50658f642bd5", + "snapshot": "os", + "instruction": "Copy directory hierarchy from \"$sourceDir\" to \"$targetDir\"", + "source": "NL2Bash", + "config": [ + { + "type": "execute", + "parameters": { + "command": "sourceDir=\"/home/user/Desktop/\"\nmkdir -p \"$sourceDir\"/dir1/dir2\nmkdir -p \"$sourceDir\"/dir3/dir4", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "infeasible", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json new file mode 100644 index 0000000..7c69d7c --- /dev/null +++ b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json @@ -0,0 +1,80 @@ +{ + "id": "5c1075ca-bb34-46a3-a7a0-029bd7463e79", + "snapshot": "os", + "instruction": "Copy all files matching \"*failed.ipynb\" in the current directory tree to \"./fails\" preserving the directory hierarchy", + "source": "NL2Bash", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1npIWJR-A78IJ2m0n6aC6Nufa8uUrcpT1&export=download&authuser=0&confirm=t&uuid=668eeed6-3e3f-4f45-997b-19bb578a3f42&at=APZUnTVL1fWbc3PKXWiBVAMPPOJY:1707985593713", + "path": "setup.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x setup.sh", + "shell": true + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=f4054888-3228-440b-8833-55c50961ea90&at=APZUnTVCiBJw-lRosK673DlvhHyx:1707985591894", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "./setup.sh", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "cd test_environment", + "shell": true + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "Evaluation successful." + ], + "exclude": [ + ] + } + } + } +} diff --git a/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json new file mode 100644 index 0000000..48a7e72 --- /dev/null +++ b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json @@ -0,0 +1,57 @@ +{ + "id": "5ced85fc-fa1a-4217-95fd-0fb530545ce2", + "snapshot": "os", + "instruction": "Append \"
\" to the end of each line in \"1\\n2\\n3\" and save in output.txt", + "source": "NL2Bash", + "config": [ + { + "type": "execute", + "parameters": { + "command": "echo -e \"1\\n2\\n3\" > input.txt", + "shell": true + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=174Bk_JLDwuTTgL0hslzxRG4xB76JxXIR&export=download&authuser=0&confirm=t&uuid=b086a8a1-00b9-4d73-ae05-c34a7fe9693d&at=APZUnTV-T0wfR_ovBue_HnO-tcai:1707989145225", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + } + ], + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "Success: The task has been completed correctly." + ], + "exclude": [ + ] + } + } + } +} diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json index f043475..19dc17f 100644 --- a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json +++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json @@ -27,17 +27,10 @@ "os" ], "evaluator": { - "func": "exact_match", + "func": "infeasible", "result": { - "type": "vm_command_line", - "command": "if [ '$(whoami)' = 'charles' ]; then echo 'Current user is charles.'; else echo 'Current user is not charles'; fi", - "shell": true }, "expected": { - "type": "rule", - "rules":{ - "expected": "Current user is charles" - } } } } diff --git a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json index 9a3de40..85cde22 100644 --- a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json +++ b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json @@ -8,7 +8,7 @@ "os" ], "evaluator": { - "func": "", + "func": "infeasible", "result": { }, "expected": { diff --git a/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json b/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json deleted file mode 100644 index 70cb9e7..0000000 --- a/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "id": "e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15", - "snapshot": "os", - "instruction": "Can you help me add Charles to my contact?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } - } - } \ No newline at end of file