From b9ae4174b173048453d20e8da7f3a6a4116d1f02 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 25 Jan 2024 19:57:32 +0800 Subject: [PATCH] Fix OS examples annotated by Yitao --- desktop_env/controllers/setup.py | 2 +- desktop_env/envs/desktop_env.py | 2 +- desktop_env/evaluators/getters/general.py | 3 +- desktop_env/evaluators/metrics/__init__.py | 27 +++-- desktop_env/evaluators/metrics/general.py | 18 ++-- desktop_env/evaluators/metrics/os.py | 102 ++---------------- .../28cc3b7e-b194-4bc9-8353-d04c0f4d56d2.json | 38 ++++--- .../3ce045a0-877b-42aa-8d2c-b4a863336ab8.json | 31 +++--- .../43c2d64c-bab5-4dcb-a30c-b888321c319a.json | 7 +- .../4d2b519e-e872-4100-8ea3-fe71ab0f9133.json | 31 +++--- .../5812b315-e7bd-4265-b51f-863c02174c28.json | 31 +++--- .../5c433d22-ed9a-4e31-91f5-54cf3e8acd63.json | 31 +++--- .../5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json | 31 +++--- .../6ebbfb01-ea72-4226-a2a6-dc428e111ed2.json | 31 +++--- .../765d2b74-88a7-4d50-bf51-34e4106fd24a.json | 31 +++--- .../7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json | 19 ++-- .../94d95f96-9699-4208-98ba-3c3119edf9c2.json | 49 +++++---- .../a462a795-fdc7-4b23-b689-e8b6df786b78.json | 8 +- .../ae039631-2b12-4637-84f6-c67d51511be3.json | 4 +- .../b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json | 31 +++--- .../b6781586-6346-41cd-935a-a6b1487918fc.json | 31 +++--- .../bedcedc4-4d72-425e-ad62-21960b11fe0d.json | 48 +++++---- .../ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json | 69 +++++++----- .../f9be0997-4b7c-45c5-b05c-4612b44a6118.json | 10 +- 24 files changed, 327 insertions(+), 358 deletions(-) diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index e30b279..43492f4 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -257,7 +257,7 @@ class SetupController: with open(os.path.join(self.cache_dir, stderr), "w") as f: f.write(results["error"]) logger.info("Command executed successfully: %s -> %s" - , " ".join(command) + , " ".join(command) if isinstance(command, list) else command , response.text ) else: diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index a734ecd..ef7a1ad 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -213,7 +213,7 @@ class DesktopEnv(gym.Env): self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id) os.makedirs(self.cache_dir, exist_ok=True) self.instruction = task_config["instruction"] - self.config = task_config["config"] + self.config = task_config["config"] if "config" in task_config else [] # evaluator dict # func -> metric function string, or list of metric function strings diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index e4e4c99..c94654e 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -9,8 +9,9 @@ def get_vm_command_line(env, config: Dict[str, str]): vm_ip = env.vm_ip port = 5000 command = config["command"] + shell = config.get("shell", False) - response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command}) + response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell}) if response.status_code == 200: return response.json()["output"] diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index ff7b8dd..b2214fd 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,23 +1,28 @@ -from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop, check_font_size, \ - check_enabled_experiments, check_history_deleted, is_expected_search_query +from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop, \ + check_font_size, \ + check_enabled_experiments, check_history_deleted, is_expected_search_query from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers, compare_docx_lines -from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \ - compare_insert_equation, compare_highlighted_text -from .docs import is_first_line_centered, check_file_exists, compare_contains_image from .docs import evaluate_colored_words_in_tables, check_highlighted_words, evaluate_strike_through_last_paragraph, \ evaluate_conversion, evaluate_spacing, check_italic_font_size_14, evaluate_alignment, get_unique_train_ids, \ check_no_duplicates -from .general import exact_match, fuzzy_match +from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \ + compare_insert_equation, compare_highlighted_text +from .docs import is_first_line_centered, check_file_exists, compare_contains_image from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json +from .general import exact_match, fuzzy_match, check_include_exclude from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions -from .slides import check_presenter_console_disable, check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \ - check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel from .libreoffice import check_libre_locale from .pdf import check_pdf_pages -#from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations +from .slides import check_presenter_console_disable, check_image_stretch_and_center, check_slide_numbers_color, \ + compare_pptx_files, check_strikethrough, \ + check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel +# from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations from .table import compare_table from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \ - compare_videos, check_qt_bgcone, check_one_instance_when_started_from_file,check_qt_minimal_view, check_qt_max_volume, \ + compare_videos, check_qt_bgcone, check_one_instance_when_started_from_file, check_qt_minimal_view, \ + check_qt_max_volume, \ check_qt_slider_colours, check_global_key_play_pause -from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings \ No newline at end of file +from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, \ + check_json_keybindings +from .os import check_gnome_favorite_apps diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index d7c945a..3f66f02 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,8 +1,9 @@ import csv -import json import functools +import json import operator import re +import sqlite3 from numbers import Number from typing import Callable, Any, Union from typing import Dict, List, Pattern @@ -14,13 +15,14 @@ from rapidfuzz import fuzz from .utils import _match_record, _match_value_to_rule -import sqlite3 def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float: + print(result, rules) include = rules.get("include", []) exclude = rules.get("exclude", []) return all(r in result for r in include) and all(r not in result for r in exclude) + def exact_match(result, rules) -> float: expect = rules["expected"] print(result, expect) @@ -36,6 +38,7 @@ def fuzzy_match(result, rules) -> float: return fuzz.ratio(result, expect) / 100. + def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float: """ Args: @@ -140,10 +143,10 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: return 0. if "text" in rules: - match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"]\ - else (lambda a, b: fuzz.ratio(a, b)/100.) - , rules["text"] - ) + match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , rules["text"] + ) match_score: Number = 0 for elm in elements: match_score = max(match_score, match_func(elm.text or None)) @@ -152,6 +155,7 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: return float(match_score) + # def check_existence(result: str, *args) -> float: # return 1. - (result is None) @@ -160,6 +164,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: cursor: sqlite3.Cursor = connection.execute(rules["sql"]) return float(cursor.fetchone()[0] or 0) + def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: """ Args: @@ -200,4 +205,3 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str value = value[k] metric = metric and not _match_value_to_rule(value, r) return metric - diff --git a/desktop_env/evaluators/metrics/os.py b/desktop_env/evaluators/metrics/os.py index 49e149b..3386466 100644 --- a/desktop_env/evaluators/metrics/os.py +++ b/desktop_env/evaluators/metrics/os.py @@ -1,58 +1,20 @@ -import os import subprocess -def is_spotify_installed() -> bool: - # Use the 'which' command to check if the 'spotify' executable exists - try: - subprocess.check_output(['which', 'spotify']) - return True - except subprocess.CalledProcessError: - return False +def check_gnome_favorite_apps(apps_str: str, rule): + # parse the string like "['thunderbird.desktop', 'vim.desktop', 'google-chrome.desktop']" + # to a list of strings + apps = eval(apps_str) + expected_apps = rule["expected"] -def is_dim_screen_inactive_off(): - # Get the current value of "Dim screen when inactive" setting - result = subprocess.run(["gsettings", "get", "org.gnome.desktop.session", "idle-delay"], capture_output=True, - text=True) + if len(apps) != len(expected_apps): + return 0 - # Check if the setting is set to "uint32 0" - if result.stdout.strip() == "uint32 0": - return 1 # Task successful + if set(apps) == set(expected_apps): + return 1 else: - return 0 # Task not successful - - -def evaluate_create_test_directory(): - try: - # Specify the path to the directory - path = '/test' - - # Check if the directory already exists - if os.path.exists(path): - return 0 # Directory already exists, task not successful - - # Check if the user has sufficient permissions to create the directory - if not os.access("/", os.W_OK | os.X_OK): - return 0 # Insufficient permissions to create the directory, task not successful - - return 1 # Task can be considered successful - except Exception as e: - print(f"Error: {str(e)}") - return 0 # Any other errors, task not successful - - -# TODO: create a file named test.txt and a directory dir1 at home dir before running this -def is_file_in_directory(): - # Specify the paths of the file and directory - file_path = "/test.txt" - directory_path = "/dir1" - - # Check if the file exists in the directory - if os.path.isfile(os.path.join(directory_path, os.path.basename(file_path))): - return 1 # Task successful - else: - return 0 # Task not successful + return 0 # TODO: log in to the system before running this @@ -64,47 +26,6 @@ def is_logout_successful(): return 1 # Task successful -def is_do_not_disturb_mode_enabled(): - try: - subprocess.run(["gsettings", "set", "org.gnome.desktop.notifications", "show-banners", "false"], check=True) - return 1 # Task successful - except subprocess.CalledProcessError: - return 0 # Task not successful - - -def get_default_browser(answer: str): - try: - result = subprocess.run(["xdg-settings", "get", "default-web-browser"], capture_output=True, text=True) - default_browser = result.stdout.strip() - return default_browser == answer - except subprocess.CalledProcessError: - return 0 - - -# TODO: should confirm initial state, i.e. initial list of favorite apps & the first app -first_app = "thunderbird.desktop" # to be changed - - -def is_first_favorite_app_removed(): - try: - result = subprocess.run(["gsettings", "get", "org.gnome.shell", "favorite-apps"], capture_output=True, - text=True) - output = result.stdout.strip() - - # Remove brackets and spaces from the output - favorites = output[1:-1].replace(" ", "") - - # Split output by comma to get individual favorite apps - favorite_apps = favorites.split(",") - - if favorite_apps[0] != first_app: - return 1 # First favorite app removed - else: - return 0 # First favorite app not removed - except subprocess.CalledProcessError: - return 0 # Task not successful - - def is_battery_percentage_displayed(): # GNOME schema and key for the setting schema = "org.gnome.desktop.interface" @@ -147,6 +68,3 @@ def check_auto_lock_settings(): except Exception as e: return 0. - - - diff --git a/evaluation_examples/examples/os/28cc3b7e-b194-4bc9-8353-d04c0f4d56d2.json b/evaluation_examples/examples/os/28cc3b7e-b194-4bc9-8353-d04c0f4d56d2.json index 0044e02..a88aaf3 100644 --- a/evaluation_examples/examples/os/28cc3b7e-b194-4bc9-8353-d04c0f4d56d2.json +++ b/evaluation_examples/examples/os/28cc3b7e-b194-4bc9-8353-d04c0f4d56d2.json @@ -1,17 +1,25 @@ { - "id": "28cc3b7e-b194-4bc9-8353-d04c0f4d56d2", - "snapshot": "os", - "instruction": "The volume of my system is too small. Can you help me turn up to the max volumn?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "28cc3b7e-b194-4bc9-8353-d04c0f4d56d2", + "snapshot": "os", + "instruction": "The volume of my system is too small. Can you help me turn up to the max volume?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "command": "pactl list sinks | grep '^[[:space:]]Volume:' | head -n 1 | awk '{print $5}' | sed 's/%//g'", + "shell": true + }, + "expected": { + "type": "rule", + "rules":{ + "expected": "100\n" + } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json b/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json index d8894a1..9c67040 100644 --- a/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json +++ b/evaluation_examples/examples/os/3ce045a0-877b-42aa-8d2c-b4a863336ab8.json @@ -1,17 +1,18 @@ { - "id": "3ce045a0-877b-42aa-8d2c-b4a863336ab8", - "snapshot": "os", - "instruction": "I want to make the text on the screen larger", - "source": "https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "3ce045a0-877b-42aa-8d2c-b4a863336ab8", + "snapshot": "os", + "instruction": "I want to make the text on the screen larger", + "source": "https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json b/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json index 99299e8..e537a60 100644 --- a/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json +++ b/evaluation_examples/examples/os/43c2d64c-bab5-4dcb-a30c-b888321c319a.json @@ -12,14 +12,13 @@ "func": "exact_match", "result": { "type": "vm_command_line", - "config": { - "command": "[ -d '/path/to/directory' ] && echo 'Directory exists.' || echo 'Directory does not exist.'" - } + "command": "[ -d '/home/user/Desktop/test' ] && echo 'Directory exists.' || echo 'Directory does not exist.'", + "shell": true }, "expected": { "type": "rule", "rules":{ - "expected": "Directory exists." + "expected": "Directory exists.\n" } } } diff --git a/evaluation_examples/examples/os/4d2b519e-e872-4100-8ea3-fe71ab0f9133.json b/evaluation_examples/examples/os/4d2b519e-e872-4100-8ea3-fe71ab0f9133.json index f09c9b0..4fbde78 100644 --- a/evaluation_examples/examples/os/4d2b519e-e872-4100-8ea3-fe71ab0f9133.json +++ b/evaluation_examples/examples/os/4d2b519e-e872-4100-8ea3-fe71ab0f9133.json @@ -1,17 +1,18 @@ { - "id": "4d2b519e-e872-4100-8ea3-fe71ab0f9133", - "snapshot": "os", - "instruction": "Could you please help me to add a new entry \"/home/david/pear/bin\" to the PATH variable in the ZSH terminal?", - "source": "https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "4d2b519e-e872-4100-8ea3-fe71ab0f9133", + "snapshot": "os", + "instruction": "Could you please help me to add a new entry \"/home/david/pear/bin\" to the PATH variable in the ZSH terminal?", + "source": "https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/5812b315-e7bd-4265-b51f-863c02174c28.json b/evaluation_examples/examples/os/5812b315-e7bd-4265-b51f-863c02174c28.json index ffbe51e..f9930e8 100644 --- a/evaluation_examples/examples/os/5812b315-e7bd-4265-b51f-863c02174c28.json +++ b/evaluation_examples/examples/os/5812b315-e7bd-4265-b51f-863c02174c28.json @@ -1,17 +1,18 @@ { - "id": "5812b315-e7bd-4265-b51f-863c02174c28", - "snapshot": "os", - "instruction": "Please create an SSH user on Ubuntu who is only allowed to access the folder \"test1\".", - "source": "https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "5812b315-e7bd-4265-b51f-863c02174c28", + "snapshot": "os", + "instruction": "Please create an SSH user on Ubuntu who is only allowed to access the folder \"test1\".", + "source": "https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/5c433d22-ed9a-4e31-91f5-54cf3e8acd63.json b/evaluation_examples/examples/os/5c433d22-ed9a-4e31-91f5-54cf3e8acd63.json index c482bc7..5482ee2 100644 --- a/evaluation_examples/examples/os/5c433d22-ed9a-4e31-91f5-54cf3e8acd63.json +++ b/evaluation_examples/examples/os/5c433d22-ed9a-4e31-91f5-54cf3e8acd63.json @@ -1,17 +1,18 @@ { - "id": "5c433d22-ed9a-4e31-91f5-54cf3e8acd63", - "snapshot": "os", - "instruction": "I want to change my system language to Chinese(simplified). Can you help me?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "5c433d22-ed9a-4e31-91f5-54cf3e8acd63", + "snapshot": "os", + "instruction": "I want to change my system language to Chinese(simplified). Can you help me?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json b/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json index 9111f9a..b6c00af 100644 --- a/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json +++ b/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json @@ -1,17 +1,18 @@ { - "id": "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57", - "snapshot": "os", - "instruction": "I am currently using an Ubuntu system, and I have wrongly deleted a file named \"test\". Could you help me recover it from the Trash?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57", + "snapshot": "os", + "instruction": "I am currently using an Ubuntu system, and I have wrongly deleted a file named \"test\". Could you help me recover it from the Trash?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/6ebbfb01-ea72-4226-a2a6-dc428e111ed2.json b/evaluation_examples/examples/os/6ebbfb01-ea72-4226-a2a6-dc428e111ed2.json index ac3de88..1d253f4 100644 --- a/evaluation_examples/examples/os/6ebbfb01-ea72-4226-a2a6-dc428e111ed2.json +++ b/evaluation_examples/examples/os/6ebbfb01-ea72-4226-a2a6-dc428e111ed2.json @@ -1,17 +1,18 @@ { - "id": "6ebbfb01-ea72-4226-a2a6-dc428e111ed2", - "snapshot": "os", - "instruction": "Could you please help me to set Bash as my default shell on the current Ubuntu system?", - "source": "https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "6ebbfb01-ea72-4226-a2a6-dc428e111ed2", + "snapshot": "os", + "instruction": "Could you please help me to set Bash as my default shell on the current Ubuntu system?", + "source": "https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/765d2b74-88a7-4d50-bf51-34e4106fd24a.json b/evaluation_examples/examples/os/765d2b74-88a7-4d50-bf51-34e4106fd24a.json index ebac87b..690cdc8 100644 --- a/evaluation_examples/examples/os/765d2b74-88a7-4d50-bf51-34e4106fd24a.json +++ b/evaluation_examples/examples/os/765d2b74-88a7-4d50-bf51-34e4106fd24a.json @@ -1,17 +1,18 @@ { - "id": "765d2b74-88a7-4d50-bf51-34e4106fd24a", - "snapshot": "os", - "instruction": "Can you help me delete the \"test\" file on my desktop?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "765d2b74-88a7-4d50-bf51-34e4106fd24a", + "snapshot": "os", + "instruction": "Can you help me delete the \"test\" file on my desktop?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json b/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json index 2692d14..a3b6035 100644 --- a/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json +++ b/evaluation_examples/examples/os/7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82.json @@ -1,13 +1,21 @@ { "id": "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82", "snapshot": "os", - "instruction": "Can you move the file with the path '/test.txt' to the directory with the path '/dir1'?", + "instruction": "Can you move the file with the path 'todo.txt' to the directory with the path 'done'?", "source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files", "config": [ { "type": "execute", "parameters": { - "command": "touch /test.txt && mkdir /dir1" + "command": "echo 'password' | sudo -S touch ~/Desktop/todo.txt && sudo mkdir ~/Desktop/done", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "echo 'password' | sudo -S chmod 777 ~/Desktop/todo.txt && sudo chmod 777 ~/Desktop/done", + "shell": true } } ], @@ -19,14 +27,13 @@ "func": "exact_match", "result": { "type": "vm_command_line", - "config": { - "command": "if [ -f '/dir/test.txt' ]; then echo 'File exists.'; else echo 'File does not exist.'; fi" - } + "command": "if [ -f ~/Desktop/done/todo.txt ]; then echo 'File exists.'; else echo 'File does not exist.'; fi", + "shell": true }, "expected": { "type": "rule", "rules":{ - "expected": "File exists." + "expected": "File exists.\n" } } } diff --git a/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json b/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json index eed2e83..90249d0 100644 --- a/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json +++ b/evaluation_examples/examples/os/94d95f96-9699-4208-98ba-3c3119edf9c2.json @@ -1,26 +1,29 @@ { - "id": "94d95f96-9699-4208-98ba-3c3119edf9c2", - "snapshot": "os", - "instruction": "I want to install Spotify on my current system. Could you please help me?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "check_include_exclude", - "result": { - "type": "vm_command_line", - "config": { - "command": "which spotify" - } - }, - "expected": { - "type": "rule", - "rules": { - "include": [], - "exclude": ["not found"] - } + "id": "94d95f96-9699-4208-98ba-3c3119edf9c2", + "snapshot": "os", + "instruction": "I want to install Spotify on my current system. Could you please help me?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "which spotify" + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "spotify" + ], + "exclude": [ + "not found" + ] } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json index 5831cb0..72d0454 100644 --- a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json +++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json @@ -7,7 +7,8 @@ { "type": "execute", "parameters": { - "command": "echo password | sudo -S su - charles" + "command": "echo password | sudo -S su - charles", + "shell": true } } ], @@ -19,9 +20,8 @@ "func": "exact_match", "result": { "type": "vm_command_line", - "config": { - "command": "if [ '$(whoami)' = 'charles' ]; then echo 'Current user is charles.'; else echo 'Current user is not charles'; fi" - } + "command": "if [ '$(whoami)' = 'charles' ]; then echo 'Current user is charles.'; else echo 'Current user is not charles'; fi", + "shell": true }, "expected": { "type": "rule", diff --git a/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json b/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json index 3336cab..c82d0b0 100644 --- a/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json +++ b/evaluation_examples/examples/os/ae039631-2b12-4637-84f6-c67d51511be3.json @@ -19,9 +19,7 @@ "func": "exact_match", "result": { "type": "vm_command_line", - "config": { - "command": "xdg-settings get default-web-browser" - } + "command": "xdg-settings get default-web-browser" }, "expected": { "type": "rule", diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json index 1c0d9b2..be03893 100644 --- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json +++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json @@ -1,17 +1,18 @@ { - "id": "b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa", - "snapshot": "os", - "instruction": "I want to switch off the Bluetooth. Can you help me?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa", + "snapshot": "os", + "instruction": "I want to switch off the Bluetooth. Can you help me?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/b6781586-6346-41cd-935a-a6b1487918fc.json b/evaluation_examples/examples/os/b6781586-6346-41cd-935a-a6b1487918fc.json index 7c892d7..7b43485 100644 --- a/evaluation_examples/examples/os/b6781586-6346-41cd-935a-a6b1487918fc.json +++ b/evaluation_examples/examples/os/b6781586-6346-41cd-935a-a6b1487918fc.json @@ -1,17 +1,18 @@ { - "id": "b6781586-6346-41cd-935a-a6b1487918fc", - "snapshot": "os", - "instruction": "I want to set my current time zone to UTC+0. Can you help me?", - "source": "https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "", - "result": { - }, - "expected": { - } + "id": "b6781586-6346-41cd-935a-a6b1487918fc", + "snapshot": "os", + "instruction": "I want to set my current time zone to UTC+0. Can you help me?", + "source": "https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json index cd12726..3061ba6 100644 --- a/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json +++ b/evaluation_examples/examples/os/bedcedc4-4d72-425e-ad62-21960b11fe0d.json @@ -1,25 +1,29 @@ { - "id": "bedcedc4-4d72-425e-ad62-21960b11fe0d", - "snapshot": "os", - "instruction": "Could you set the 'Dim screen when inactive' to on in setting?", - "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "exact_match", - "result": { - "type": "vm_command_line", - "config": { - "command": "gsettings get org.gnome.desktop.session idle-delay" - } - }, - "expected": { - "type": "rule", - "rules":{ - "expected": "0" - } + "id": "bedcedc4-4d72-425e-ad62-21960b11fe0d", + "snapshot": "os", + "instruction": "Could you set the 'Dim screen when inactive' to on in setting?", + "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", + "trajectory": "trajectories/", + "config": [], + "related_apps": [ + "os" + ], + "evaluator": { + "func": "exact_match", + "result": { + "type": "vm_command_line", + "command": [ + "gsettings", + "get", + "org.gnome.desktop.session", + "idle-delay" + ] + }, + "expected": { + "type": "rule", + "rules": { + "expected": "uint32 0\n" } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json index dd398a0..1b4a330 100644 --- a/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json +++ b/evaluation_examples/examples/os/ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3.json @@ -1,33 +1,46 @@ { - "id": "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3", - "snapshot": "os", - "instruction": "Can you remove the first favorite app from 'favorites'?", - "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", - "config": [ - { - "type": "execute", - "parameters": { - "command": "gsettings set org.gnome.shell favorite-apps \"['thunderbird.desktop', 'firefox.desktop', 'libreoffice-writer.desktop']\"" - } + "id": "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3", + "snapshot": "os", + "instruction": "Can you remove the first favorite app from 'favorites'?", + "source": "https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s", + "config": [ + { + "type": "execute", + "parameters": { + "command": "gsettings set org.gnome.shell favorite-apps \"['thunderbird.desktop']\"" } - ], - "trajectory": "trajectories/", - "related_apps": [ - "os" - ], - "evaluator": { - "func": "exact_match", - "result": { - "type": "vm_command_line", - "config": { - "command": "gsettings get org.gnome.shell favorite-apps" - } - }, - "expected": { - "type": "rule", - "rules":{ - "expected": "['firefox.desktop', 'libreoffice-writer.desktop']" - } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": "gsettings set org.gnome.shell favorite-apps \"['thunderbird.desktop', 'vim.desktop', 'google-chrome.desktop']\"" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os" + ], + "evaluator": { + "func": "check_gnome_favorite_apps", + "result": { + "type": "vm_command_line", + "command": "gsettings get org.gnome.shell favorite-apps" + }, + "expected": { + "type": "rule", + "rules": { + "expected": [ + "google-chrome.desktop", + "thunderbird.desktop" + ] } } } +} diff --git a/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json b/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json index 55052ff..96632aa 100644 --- a/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json +++ b/evaluation_examples/examples/os/f9be0997-4b7c-45c5-b05c-4612b44a6118.json @@ -7,7 +7,8 @@ { "type": "execute", "parameters": { - "command": "gsettings set org.gnome.desktop.notifications show-banners true" + "command": "gsettings set org.gnome.desktop.notifications show-banners true", + "shell": true } } ], @@ -19,14 +20,13 @@ "func": "exact_match", "result": { "type": "vm_command_line", - "config": { - "command": "gsettings get org.gnome.desktop.notifications show-banners" - } + "command": "gsettings get org.gnome.desktop.notifications show-banners", + "shell": true }, "expected": { "type": "rule", "rules":{ - "expected": "false" + "expected": "false\n" } } }