From 93b4ff7d957270b50f46e3fff5516715011a6710 Mon Sep 17 00:00:00 2001 From: Liu Yitao Date: Thu, 25 Jan 2024 10:45:51 +0800 Subject: [PATCH] Update OS evals --- desktop_env/evaluators/metrics/general.py | 6 ++ desktop_env/evaluators/metrics/os.py | 93 +++++++++++++++++++ .../43c2d64c-bab5-4dcb-a30c-b888321c319a.json | 26 ++++++ .../7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json | 33 +++++++ .../94d95f96-9699-4208-98ba-3c3119edf9c2.json | 26 ++++++ .../a462a795-fdc7-4b23-b689-e8b6df786b78.json | 33 +++++++ .../ae039631-2b12-4637-84f6-c67d51511be3.json | 33 +++++++ .../bedcedc4-4d72-425e-ad62-21960b11fe0d.json | 25 +++++ .../ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json | 18 ++++ .../f9be0997-4b7c-45c5-b05c-4612b44a6118.json | 18 ++++ 10 files changed, 311 insertions(+) create mode 100644 desktop_env/evaluators/metrics/os.py create mode 100644 evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json create mode 100644 evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json create mode 100644 evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json create mode 100644 evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json create mode 100644 evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json create mode 100644 evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json create mode 100644 evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json create mode 100644 evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 98c9596..d7c945a 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -16,6 +16,11 @@ from .utils import _match_record, _match_value_to_rule import sqlite3 +def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float: + include = rules.get("include", []) + exclude = rules.get("exclude", []) + return all(r in result for r in include) and all(r not in result for r in exclude) + def exact_match(result, rules) -> float: expect = rules["expected"] print(result, expect) @@ -195,3 +200,4 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str value = value[k] metric = metric and not _match_value_to_rule(value, r) return metric + diff --git a/desktop_env/evaluators/metrics/os.py b/desktop_env/evaluators/metrics/os.py new file mode 100644 index 0000000..b97648c --- /dev/null +++ b/desktop_env/evaluators/metrics/os.py @@ -0,0 +1,93 @@ +import subprocess +import os + +def is_spotify_installed() -> bool: + # Use the 'which' command to check if the 'spotify' executable exists + try: + subprocess.check_output(['which', 'spotify']) + return True + except subprocess.CalledProcessError: + return False + +def is_dim_screen_inactive_off(): + # Get the current value of "Dim screen when inactive" setting + result = subprocess.run(["gsettings", "get", "org.gnome.desktop.session", "idle-delay"], capture_output=True, text=True) + + # Check if the setting is set to "uint32 0" + if result.stdout.strip() == "uint32 0": + return 1 # Task successful + else: + return 0 # Task not successful + +def evaluate_create_test_directory(): + try: + # Specify the path to the directory + path = '/test' + + # Check if the directory already exists + if os.path.exists(path): + return 0 # Directory already exists, task not successful + + # Check if the user has sufficient permissions to create the directory + if not os.access("/", os.W_OK | os.X_OK): + return 0 # Insufficient permissions to create the directory, task not successful + + return 1 # Task can be considered successful + except Exception as e: + print(f"Error: {str(e)}") + return 0 # Any other errors, task not successful + +# TODO: create a file named test.txt and a directory dir1 at home dir before running this +def is_file_in_directory(): + # Specify the paths of the file and directory + file_path = "/test.txt" + directory_path = "/dir1" + + # Check if the file exists in the directory + if os.path.isfile(os.path.join(directory_path, os.path.basename(file_path))): + return 1 # Task successful + else: + return 0 # Task not successful + +# TODO: log in to the system before running this +def is_logout_successful(): + try: + subprocess.run(["whoami"]) + return 0 # Task not successful + except subprocess.CalledProcessError: + return 1 # Task successful + +def is_do_not_disturb_mode_enabled(): + try: + subprocess.run(["gsettings", "set", "org.gnome.desktop.notifications", "show-banners", "false"], check=True) + return 1 # Task successful + except subprocess.CalledProcessError: + return 0 # Task not successful + +def get_default_browser(answer: str): + try: + result = subprocess.run(["xdg-settings", "get", "default-web-browser"], capture_output=True, text=True) + default_browser = result.stdout.strip() + return default_browser == answer + except subprocess.CalledProcessError: + return 0 + +# TODO: should confirm initial state, i.e. initial list of favorite apps & the first app +first_app = "thunderbird.desktop" # to be changed +def is_first_favorite_app_removed(): + try: + result = subprocess.run(["gsettings", "get", "org.gnome.shell", "favorite-apps"], capture_output=True, text=True) + output = result.stdout.strip() + + # Remove brackets and spaces from the output + favorites = output[1:-1].replace(" ", "") + + # Split output by comma to get individual favorite apps + favorite_apps = favorites.split(",") + + if favorite_apps[0] != first_app: + return 1 # First favorite app removed + else: + return 0 # First favorite app not removed + except subprocess.CalledProcessError: + return 0 # Task not successful \ No newline at end of file diff --git a/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json b/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json new file mode 100644 index 0000000..99299e8 --- /dev/null +++ b/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json @@ -0,0 +1,26 @@ +{ + "id": "43c2d64c-bab5-4dcb-a30c-b888321c319a", + "snapshot": "os", + "instruction": "Could you please help me create a dir named 'test' in the root directory of current computer?", + "source": "https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "config": { + "command": "[ -d '/path/to/directory' ] && echo 'Directory exists.' || echo 'Directory does not exist.'" + } + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "Directory exists." + } + } + } + } \ No newline at end of file diff --git a/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json b/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json new file mode 100644 index 0000000..2692d14 --- /dev/null +++ b/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json @@ -0,0 +1,33 @@ +{ + "id": "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82", + "snapshot": "os", + "instruction": "Can you move the file with the path '/test.txt' to the directory with the path '/dir1'?", + "source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files", + "config": [ + { + "type": "execute", + "parameters": { + "command": "touch /test.txt && mkdir /dir1" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "config": { + "command": "if [ -f '/dir/test.txt' ]; then echo 'File exists.'; else echo 'File does not exist.'; fi" + } + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "File exists." + } + } + } + } diff --git a/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json b/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json new file mode 100644 index 0000000..eed2e83 --- /dev/null +++ b/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json @@ -0,0 +1,26 @@ +{ + "id": "94d95f96-9699-4208-98ba-3c3119edf9c2", + "snapshot": "os", + "instruction": "I want to install Spotify on my current system. Could you please help me?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en", + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "config": { + "command": "which spotify" + } + }, + "expected": { + "type": "rule", + "rules": { + "include": [], + "exclude": ["not found"] + } + } + } + } \ No newline at end of file diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json new file mode 100644 index 0000000..5831cb0 --- /dev/null +++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json @@ -0,0 +1,33 @@ +{ + "id": "a462a795-fdc7-4b23-b689-e8b6df786b78", + "snapshot": "os", + "instruction": "Could you please help me to switch to the user 'Charles' with password of 'Ex@mpleP@55w0rd!'?", + "source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files", + "config": [ + { + "type": "execute", + "parameters": { + "command": "echo password | sudo -S su - charles" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "config": { + "command": "if [ '$(whoami)' = 'charles' ]; then echo 'Current user is charles.'; else echo 'Current user is not charles'; fi" + } + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "Current user is charles" + } + } + } +} diff --git a/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json b/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json new file mode 100644 index 0000000..3336cab --- /dev/null +++ b/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json @@ -0,0 +1,33 @@ +{ + "id": "ae039631-2b12-4637-84f6-c67d51511be3", + "snapshot": "os", + "instruction": "Could you please help me check out my current default browser?", + "source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files", + "config": [ + { + "type": "execute", + "parameters": { + "command": "xdg-settings set default-web-browser firefox.desktop" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "config": { + "command": "xdg-settings get default-web-browser" + } + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "firefox.desktop" + } + } + } +} diff --git a/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json new file mode 100644 index 0000000..cd12726 --- /dev/null +++ b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json @@ -0,0 +1,25 @@ +{ + "id": "bedcedc4-4d72-425e-ad62-21960b11fe0d", + "snapshot": "os", + "instruction": "Could you set the 'Dim screen when inactive' to on in setting?", + "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "config": { + "command": "gsettings get org.gnome.desktop.session idle-delay" + } + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "0" + } + } + } + } \ No newline at end of file diff --git a/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json new file mode 100644 index 0000000..ee7eef9 --- /dev/null +++ b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json @@ -0,0 +1,18 @@ +{ + "id": "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3", + "snapshot": "os", + "instruction": "Can you remove the first favorite app from 'favorites'?", + "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "is_first_favorite_app_removed", + "result": { + }, + "expected": { + } + } + } \ No newline at end of file diff --git a/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json b/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json new file mode 100644 index 0000000..ac1d85a --- /dev/null +++ b/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json @@ -0,0 +1,18 @@ +{ + "id": "f9be0997-4b7c-45c5-b05c-4612b44a6118", + "snapshot": "os", + "instruction": "I am currently working on a ubuntu system but I do not want the notifications to bother me. Can you help me to switch to 'Do not disturb mode'?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "is_do_not_disturb_mode_enabled", + "result": { + }, + "expected": { + } + } + } \ No newline at end of file