sci-gui-agent-benchmark/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json

{
  "id": "5c1075ca-bb34-46a3-a7a0-029bd7463e79",
  "snapshot": "os",
  "instruction": "Copy all files matching \"*failed.ipynb\" in the current directory tree to \"./fails\" preserving the directory hierarchy",
  "source": "NL2Bash",
  "config": [
    {
      "type": "download",
      "parameters": {
        "files": [
          {
            "url": "https://drive.usercontent.google.com/download?id=1npIWJR-A78IJ2m0n6aC6Nufa8uUrcpT1&export=download&authuser=0&confirm=t&uuid=668eeed6-3e3f-4f45-997b-19bb578a3f42&at=APZUnTVL1fWbc3PKXWiBVAMPPOJY:1707985593713",
            "path": "setup.sh"
          }
        ]
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": "chmod +x setup.sh",
        "shell": true
      }
    },
    {
      "type": "download",
      "parameters": {
        "files": [
          {
            "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=9d69de2c-9908-4f6e-ad21-e8329440ee89&at=APZUnTU0uxOOMOi0HuEo2mRLcyjW:1708524614643",
            "path": "eval.sh"
          }
        ]
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": "chmod +x eval.sh",
        "shell": true
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": "./setup.sh",
        "shell": true
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Terminal"
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "python",
          "-c",
          "import pyautogui; import time; time.sleep(0.5); pyautogui.write('cd test_environment'); time.sleep(0.5); pyautogui.press('enter')"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "os"
  ],
  "evaluator": {
    "func": "check_include_exclude",
    "result": {
      "type": "vm_command_line",
      "command": "bash eval.sh",
      "shell": true
    },
    "expected": {
      "type": "rule",
      "rules": {
        "include": [
          "Evaluation successful."
        ],
        "exclude": [
        ]
      }
    }
  }
}