From 2aad4357b2d3966400e853e7c06f6a429e716cc6 Mon Sep 17 00:00:00 2001 From: tsuky_chen <91684733+chenjix@users.noreply.github.com> Date: Wed, 28 Feb 2024 21:40:53 +0800 Subject: [PATCH 01/13] Update slides.py --- desktop_env/evaluators/metrics/slides.py | 1 - 1 file changed, 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index e83a4a7..9bbfb38 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -165,7 +165,6 @@ def compare_pptx_files(file1_path, file2_path, **options): # compare the content of each slide for slide1, slide2 in zip(prs1.slides, prs2.slides): slide_idx += 1 - print(slide_idx) def get_slide_background_color(slide): background = slide.background if background.fill.background(): From 33ace6937b051801adf58fc527b13a72ff53f527 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 28 Feb 2024 22:35:04 +0800 Subject: [PATCH 02/13] ver Feb28th a new multi app task --- init a web extension project with web tool --- branch-config/filelist | 1 + desktop_env/evaluators/metrics/__init__.py | 3 +- desktop_env/evaluators/metrics/general.py | 24 +++- desktop_env/evaluators/metrics/utils.py | 4 +- .../74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json | 107 ++++++++++++++++++ quick_evaluate.py | 77 +++++++++++++ 6 files changed, 209 insertions(+), 7 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json create mode 100644 quick_evaluate.py diff --git a/branch-config/filelist b/branch-config/filelist index b386813..aec119b 100644 --- a/branch-config/filelist +++ b/branch-config/filelist @@ -12,3 +12,4 @@ experiment_screenshot_seeact.py experiment_screenshot_som.py quick_compare_table.py +quick_evaluate.py diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 5a7025c..ed0dd86 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -57,7 +57,8 @@ from .general import ( is_in_list, fuzzy_match, check_include_exclude, - check_direct_json_object + check_direct_json_object, + diff_text_file ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 550c54b..4458a69 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -12,6 +12,7 @@ import lxml.etree from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz +import difflib from .utils import _match_record, _match_value_to_rule @@ -45,7 +46,15 @@ def is_in_list(result, rules) -> float: else: return 0. +def diff_text_file(result: str, expect: str) -> float: + if result is None: + return 0. + with open(result) as f: + result_lines: List[str] = f.read().splitlines() + with open(expect) as f: + expected_lines: List[str] = f.read().splitlines() + return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -212,14 +221,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str for r in expect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + return 0. metric = metric and _match_value_to_rule(value, r) for r in unexpect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + value = None + break metric = metric and not _match_value_to_rule(value, r) - return metric + return float(metric) def check_direct_json_object(result, rules)->float: @@ -238,4 +254,4 @@ def check_direct_json_object(result, rules)->float: expected_value = expected_json.get(key) if expected_value != result.get(key): return 0. - return 1.0 \ No newline at end of file + return 1.0 diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 6f444f8..0cb8513 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -587,7 +587,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: bool """ - if rule["method"].startswith("re"): + if rule["method"].startswith("re"): # re.FLAGs flags: List[str] = rule["method"].split(".")[1:] flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) @@ -600,7 +600,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: , "ge", "gt" }: return getattr(operator, rule["method"])(value, rule["ref"]) - if rule["method"].startswith("approx"): + if rule["method"].startswith("approx"): # approx:THRESHOLD threshold: float = float(rule["method"].split(":")[1]) logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value)) try: diff --git a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json new file mode 100644 index 0000000..731020a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json @@ -0,0 +1,107 @@ +{ + "id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "snapshot": "chrome", + "instruction": "Help me to get an initial setup of web extension with help of https://webext.eu . Tag the extension as \"happy-extension v0.0.1\". Leave description as blank for now. A background script and browser action is needed, while other features are not. Place the auto-generated folder under folder \"~/Projects\".", + "source": "authors", + "config": [], + "trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "related_apps": [ + "chrome", + "os" + ], + "evaluator": { + "func": [ + "check_json", + "diff_text_file", + "diff_text_file", + "diff_text_file", + "diff_text_file" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/manifest.json", + "dest": "manifest.json" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/background_script.js", + "dest": "background_script.js" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/index.html", + "dest": "index.html" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/style.css", + "dest": "style.css" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/script.js", + "dest": "script.js" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": [ + { + "key": ["name"], + "method": "eq", + "ref": "happy-extension" + }, + { + "key": ["version"], + "method": "eq", + "ref": "0.0.1" + }, + { + "key": ["background", "scripts"], + "method": "eq", + "ref": ["background_script.js"] + }, + { + "key": ["browser_action", "default_icon"], + "method": "eq", + "ref": {"64": "icons/icon.png"} + }, + { + "key": ["browser_action", "default_popup"], + "method": "eq", + "ref": "browserAction/index.html" + }, + { + "key": ["browser_action", "default_title"], + "method": "eq", + "ref": "happy-extension" + } + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download", + "dest": "background_script.js" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download", + "dest": "index.html" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download", + "dest": "style.css" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download", + "dest": "script.js" + } + ] + } +} diff --git a/quick_evaluate.py b/quick_evaluate.py new file mode 100644 index 0000000..99aafaa --- /dev/null +++ b/quick_evaluate.py @@ -0,0 +1,77 @@ +import datetime +import json +import logging +import os +import sys +import time +import argparse +from desktop_env.envs.desktop_env import DesktopEnv + +# Logger Configs {{{ # +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + +file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8") +debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8") +stdout_handler = logging.StreamHandler(sys.stdout) +sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8") + +file_handler.setLevel(logging.INFO) +debug_handler.setLevel(logging.DEBUG) +stdout_handler.setLevel(logging.INFO) +sdebug_handler.setLevel(logging.DEBUG) + +formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") +file_handler.setFormatter(formatter) +debug_handler.setFormatter(formatter) +stdout_handler.setFormatter(formatter) +sdebug_handler.setFormatter(formatter) + +stdout_handler.addFilter(logging.Filter("desktopenv")) +sdebug_handler.addFilter(logging.Filter("desktopenv")) + +logger.addHandler(file_handler) +logger.addHandler(debug_handler) +logger.addHandler(stdout_handler) +logger.addHandler(sdebug_handler) +# }}} Logger Configs # + +logger = logging.getLogger("desktopenv.main") + + +def human_agent(): + """ + Runs the Gym environment with human input. + """ + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--path', type=str, required=True, help="Path to the virtual machine .vmx file.") + parser.add_argument('-s', '--snapshot', type=str, help="Name of the snapshot to restore.") + parser.add_argument('-e', '--example', type=str, help="Path to the example json file.") + args = parser.parse_args(sys.argv[1:]) + + example_path = args.example if args.example is not None and os.path.exists(args.example) else \ + 'evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json' + with open(example_path, "r") as f: + example = json.load(f) + # change to your customized snapshot + if args.snapshot is not None: example["snapshot"] = args.snapshot + + assert os.path.exists(args.path), "The specified path to the .vmx file does not exist." + env = DesktopEnv( + path_to_vm=args.path, + action_space="computer_13", + task_config=example + ) + + result = env.evaluate() + logger.info("Result: %.2f", result) + + # env.close() + logger.info("Environment closed.") + + +if __name__ == "__main__": + human_agent() From f95458dfd33a12111e1dd452b620adeb3022dec7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 12:07:26 +0800 Subject: [PATCH 03/13] ver Feb29th fixed a bug in load_charts --- desktop_env/evaluators/metrics/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 0cb8513..4515cd5 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -124,10 +124,14 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An value_str: str = ser.val.numRef.f elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"): value_str: str = ser.val.strRef.f + else: + value_str: str = "" if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"): categ_str: str = ser.cat.numRef.f elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"): categ_str: str = ser.cat.strRef.f + else: + categ_str: str = "" series.append("{:},{:}".format(value_str, categ_str)) series: str = ";".join(series) From a6c43cc96a9c6050a23d93eb5b6ffda009cbc00b Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 12:07:26 +0800 Subject: [PATCH 04/13] ver Feb29th fixed a bug in load_charts --- desktop_env/evaluators/metrics/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 6f444f8..7036080 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -124,10 +124,14 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An value_str: str = ser.val.numRef.f elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"): value_str: str = ser.val.strRef.f + else: + value_str: str = "" if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"): categ_str: str = ser.cat.numRef.f elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"): categ_str: str = ser.cat.strRef.f + else: + categ_str: str = "" series.append("{:},{:}".format(value_str, categ_str)) series: str = ";".join(series) From 6fef1cfb8fd957c42fd852ab22322174da618afc Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 20:05:03 +0800 Subject: [PATCH 05/13] ver Feb29thv2 updated a task config --- .../sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index ad9c17f..4c2e0a1 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -1,7 +1,7 @@ { "id": "30e3e107-1cfb-46ee-a755-2cd080d7ba6a", "snapshot": "libreoffice_calc", - "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment.", + "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue (#0000ff) fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment. They should be stacked one by one in the new sheet, each separated with a blank line.", "source": "SheetCopilot@9", "config": [ { From ac3471b847c59d040435fb182991c4b50c351b15 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 20:05:03 +0800 Subject: [PATCH 06/13] ver Feb29thv2 updated a task config --- .../sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index ad9c17f..4c2e0a1 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -1,7 +1,7 @@ { "id": "30e3e107-1cfb-46ee-a755-2cd080d7ba6a", "snapshot": "libreoffice_calc", - "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment.", + "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue (#0000ff) fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment. They should be stacked one by one in the new sheet, each separated with a blank line.", "source": "SheetCopilot@9", "config": [ { From 7566555eb32d9d230ca9221048050c25bffed1e1 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 22:56:40 +0800 Subject: [PATCH 07/13] ver Feb29thv3 updated evaluation for a sheetcopilot task --- .../30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index 4c2e0a1..8536295 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -71,11 +71,6 @@ }, "options": { "rules": [ - { - "type": "sheet_data", - "sheet_idx0": "RNSheet2", - "sheet_idx1": "ENSheet2" - }, { "type": "pivot_table", "sheet_idx0": "RNSheet2", @@ -103,6 +98,10 @@ "font_bold": { "method": "eq", "ref": true + }, + "value": { + "method": "eq", + "ref": "Demographic Profile" } } } From f0a20cdbd141eba822742e28fac1d714ce182744 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 22:56:40 +0800 Subject: [PATCH 08/13] ver Feb29thv3 updated evaluation for a sheetcopilot task --- .../30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index 4c2e0a1..8536295 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -71,11 +71,6 @@ }, "options": { "rules": [ - { - "type": "sheet_data", - "sheet_idx0": "RNSheet2", - "sheet_idx1": "ENSheet2" - }, { "type": "pivot_table", "sheet_idx0": "RNSheet2", @@ -103,6 +98,10 @@ "font_bold": { "method": "eq", "ref": true + }, + "value": { + "method": "eq", + "ref": "Demographic Profile" } } } From 58e21ab87171ff1595c5ecf47d9c7200aed020e7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 1 Mar 2024 13:35:52 +0800 Subject: [PATCH 09/13] ver Mar1st the multi_app task proposed by tao --- desktop_env/evaluators/metrics/table.py | 1 - .../7e287123-70ca-47b9-8521-47db09b69b14.json | 96 +++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 24e098e..f593524 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -122,7 +122,6 @@ def compare_table(result: str, expected: str = None, **options) -> float: worksheetr_names: List[str] = pdworkbookr.sheet_names if expected is not None: - xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected) pdworkbooke = pd.ExcelFile(expected) worksheete_names: List[str] = pdworkbooke.sheet_names diff --git a/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json new file mode 100644 index 0000000..a398bd4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json @@ -0,0 +1,96 @@ +{ + "id": "7e287123-70ca-47b9-8521-47db09b69b14", + "snapshot": "libreoffice_calc", + "instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for the past five years in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Fundings/ecs", "/home/user/Documents/Fundings/grf"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Fundings/ecs/ecs15.pdf", "url": "https://drive.google.com/uc?id=1FTiT3mLlkehe2yWVdSMWr1w4ltLtQZUy&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs16.pdf", "url": "https://drive.google.com/uc?id=1DMzZyhDey3lDsQ7fcPiJm9AYGECZms3q&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs17.pdf", "url": "https://drive.google.com/uc?id=1TgAWk7FiV8fNrG2L3-Eu7BDccqtXebXY&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs23.pdf", "url": "https://drive.google.com/uc?id=11DVxH4eRjECUxZNbUhYNJhgD0Y5WoN8r&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs22.pdf", "url": "https://drive.google.com/uc?id=17IgyJADA65F40kH79S90QgEzPa7IERXx&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs21.pdf", "url": "https://drive.google.com/uc?id=1kB4eFfLisPXKOirGUHbbcOyf73t7MVqL&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs20.pdf", "url": "https://drive.google.com/uc?id=179j9tD1xRSgd9COM7rzErO6FLYO2sc_a&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs19.pdf", "url": "https://drive.google.com/uc?id=1ogZUTfKjvQhg58GXtVUxe1U8VvHU-3ap&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs18.pdf", "url": "https://drive.google.com/uc?id=1MfuvRhAnhMEMbxn5js2ffWqMHJx2fsd2&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/customer-information-sheet-for-inward-payments-to-hong-kong.pdf", "url": "https://drive.google.com/uc?id=1s-H3an7HLBM9ku6d6Hcdj1qkSwKAHngU&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf15.pdf", "url": "https://drive.google.com/uc?id=1rRQXo9XHnCVTG8XqNAv0SJwPTW36MMbm&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf16.pdf", "url": "https://drive.google.com/uc?id=18ljRqkdyXEZ464E0dpKjaEa2NFexyw3I&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf17.pdf", "url": "https://drive.google.com/uc?id=1VrqOnyhpOkMpyIJ6YMrAhixpahjYonOd&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf18.pdf", "url": "https://drive.google.com/uc?id=182CLDUr372-jpAiY4YvSbGNXF9TsWxzA&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf19.pdf", "url": "https://drive.google.com/uc?id=1YkJtjlklKN0NmLiI2Hi4f_dKtTm5SPxT&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf20.pdf", "url": "https://drive.google.com/uc?id=1a7Uc7VCMlEX6fy-5oqE6i1YLitBe7gaf&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf21.pdf", "url": "https://drive.google.com/uc?id=1s8km4Wle4lc5PkbUQfivBFK0IJQgxMiB&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf22.pdf", "url": "https://drive.google.com/uc?id=1HTEb1uK7LNvbVyeXgO8WemCPPgiASKiy&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf23.pdf", "url": "https://drive.google.com/uc?id=1XM-jZlfu_i4waDZHb8Z6Vr5b3LgULTtP&export=download"} + ] + } + } + ], + "trajectory": "trajectories/7e287123-70ca-47b9-8521-47db09b69b14", + "related_apps": [ + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "libreoffice", + "--convert-to", + "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", + "--outdir", + "/home/user/Desktop", + "/home/user/Desktop/GRF-p5y.xlsx" + ] + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/GRF-p5y.xlsx", + "/home/user/Desktop/GRF-p5y-Sheet1.csv" + ], + "dest": [ + "GRF-p5y.xlsx", + "GRF-p5y-Sheet1.csv" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1fDM4Y-WuFCnfksPLgynj-WSmzbqn2TcV&export=download", + "https://drive.google.com/uc?id=1waThupubGOJop0FU0b0yhT6QnjNYkLGy&export=download" + ], + "dest": [ + "GRF-p5y-gt.xlsx", + "GRF-p5y-gt-Sheet1.csv" + ], + "multi": true + }, + "options": { + "rules": [ + { + "type": "sheet_print", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1" + } + ] + } + } +} From afba204912ad99d4e3d8cbafbed2fc9f8ecbd8d1 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Fri, 1 Mar 2024 18:02:00 +0800 Subject: [PATCH 10/13] Initialize new multiple-apps examples --- .../0c825995-5b70-4526-b663-113f4c999dd2.json | 28 ++++++++++++++++++ .../26150609-0da3-4a7d-8868-0faf9c5f01bb.json | 26 +++++++++++++++++ .../767a3271-56db-4745-ac5d-846ef05e6fe5.json | 27 +++++++++++++++++ .../869de13e-bef9-4b91-ba51-f6708c40b096.json | 29 +++++++++++++++++++ .../9219480b-3aed-47fc-8bac-d2cffc5849f7.json | 26 +++++++++++++++++ .../d1acdb87-bb67-4f30-84aa-990e56a09c92.json | 26 +++++++++++++++++ .../0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json | 2 +- 7 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json create mode 100644 evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json create mode 100644 evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json create mode 100644 evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json create mode 100644 evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json create mode 100644 evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json diff --git a/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json b/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json new file mode 100644 index 0000000..a272e3e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json @@ -0,0 +1,28 @@ +{ + "id": "0c825995-5b70-4526-b663-113f4c999dd2", + "snapshot": "libreoffice_calc", + "instruction": "Could you do me a favor? I need to find a Word document called 'ParkProposal.docx' on my computer and copy the 'Budget Estimates' and 'Design Concepts' sections into a new Google Doc. We're trying to collaborate more efficiently on this community park project. Thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json new file mode 100644 index 0000000..4a1af8e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json @@ -0,0 +1,26 @@ +{ + "id": "26150609-0da3-4a7d-8868-0faf9c5f01bb", + "snapshot": "libreoffice_calc", + "instruction": "So, I've been dabbling with coding a Snake game in Python, and I finally got it up and running. It's pretty cool, but it's not without its quirks. The biggest issue I'm facing right now is that the snake can't seem to eat the food, no matter what. Could you help me tweak the code so the snake can actually eat the food? Thanks a bunch!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/26150609-0da3-4a7d-8868-0faf9c5f01bb", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json new file mode 100644 index 0000000..9c7a2b7 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json @@ -0,0 +1,27 @@ +{ + "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json new file mode 100644 index 0000000..755471a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json @@ -0,0 +1,29 @@ +{ + "id": "869de13e-bef9-4b91-ba51-f6708c40b096", + "snapshot": "libreoffice_calc", + "instruction": "Can you organize my desktop by identifying work-related files, personal projects, and random downloads, ensuring no file is misplaced? Specifically, work files should end up in the 'Work' folder, personal projects in 'Personal Projects', and everything else in 'Miscellaneous'. Use content analysis for files without clear extensions or names.", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/869de13e-bef9-4b91-ba51-f6708c40b096", + "related_apps": [ + "libreoffice_writer", + "libreoffice_calc", + "os", + "pdf" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json new file mode 100644 index 0000000..30c42ae --- /dev/null +++ b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json @@ -0,0 +1,26 @@ +{ + "id": "9219480b-3aed-47fc-8bac-d2cffc5849f7", + "snapshot": "libreoffice_calc", + "instruction": "Hi, I recently playing with developing a small python-based teris game. While I have finished most of the part, something is wrong under some cases when I press up to rotate, the whole program will crash, please run the code for me and fix the bugs of code.", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/9219480b-3aed-47fc-8bac-d2cffc5849f7", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json new file mode 100644 index 0000000..d6e3d96 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json @@ -0,0 +1,26 @@ +{ + "id": "d1acdb87-bb67-4f30-84aa-990e56a09c92", + "snapshot": "libreoffice_calc", + "instruction": "Can you help me find these restaurants on Google Maps? I have a list of places I've been wanting to visit, and I need to gather some detailed information about them, including the address, what people have to say about them (ratings), and the hours of operation. It would be great if you could search for each of them on Google Maps and help me jot down this information into my sheet file, thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/d1acdb87-bb67-4f30-84aa-990e56a09c92", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/sheetcopilot/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json b/evaluation_examples/examples/sheetcopilot/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json index c8a910f..fbe0152 100644 --- a/evaluation_examples/examples/sheetcopilot/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json +++ b/evaluation_examples/examples/sheetcopilot/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json @@ -1,7 +1,7 @@ { "id": "0a2e43bf-b26c-4631-a966-af9dfa12c9e5", "snapshot": "libreoffice_calc", - "instruction": "Work out the monthly total sales in a new row called \"Total\" and then create a line chart to show the results.", + "instruction": "Work out the monthly total sales in a new row called \"Total\" and then create a line chart to show the results (x-axis be Months).", "source": "SheetCopilot@154", "config": [ { From 273bd16c3a7d1dc1b56a2c3d08370e642200a181 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Fri, 1 Mar 2024 23:13:03 +0800 Subject: [PATCH 11/13] Update new multiple-apps examples setup parts --- .../0c825995-5b70-4526-b663-113f4c999dd2.json | 23 ++++- .../26150609-0da3-4a7d-8868-0faf9c5f01bb.json | 85 ++++++++++++------ .../9219480b-3aed-47fc-8bac-d2cffc5849f7.json | 43 +++++++++- .../d1acdb87-bb67-4f30-84aa-990e56a09c92.json | 86 +++++++++++++------ 4 files changed, 185 insertions(+), 52 deletions(-) diff --git a/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json b/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json index a272e3e..a113665 100644 --- a/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json +++ b/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json @@ -1,11 +1,28 @@ { "id": "0c825995-5b70-4526-b663-113f4c999dd2", "snapshot": "libreoffice_calc", - "instruction": "Could you do me a favor? I need to find a Word document called 'ParkProposal.docx' on my computer and copy the 'Budget Estimates' and 'Design Concepts' sections into a new Google Doc. We're trying to collaborate more efficiently on this community park project. Thanks!", + "instruction": "I'm working on a comprehensive report for our environmental policy review meeting next week. I need to integrate key insights from an important document, which is a guidebook on the Green Economy, where I'm particularly interested in the 'Introduction' section. Could you extract this section and compile them into a new Google Doc named 'environment_policy_report (draft)'? This will significantly aid in our discussion on aligning our environmental policies with sustainable and green economic practices. Thanks!", "source": "authors", "config": [ - - ], + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Desktop/wwf"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Desktop/wwf/lpr_living_planet_report_2016.pdf", "url": "https://drive.google.com/uc?id=19NCdw_MVP6nH5nC6okYYe8U1mJABfTRK&export=download"}, + {"path": "/home/user/Desktop/wwf/279c656a32_ENGLISH_FULL.pdf", "url": "https://drive.google.com/uc?id=1ckH1NetfImQ9EyONTO-ZFWA8m8VIUFvD&export=download"}, + {"path": "/home/user/Desktop/wwf/7g37j96psg_WWF_AR2021_spreads.pdf", "url": "https://drive.google.com/uc?id=1cxLTzmqDKMomOyvho29lvFvhRnb0Y8__&export=download"}, + {"path": "/home/user/Desktop/GE Guidebook.pdf", "url": "https://drive.google.com/uc?id=1KzC_R3eI3Rmgwz5bkcI8Ohv7ebOrU-Is&export=download"}, + {"path": "/home/user/Desktop/assessing_and_reporting_water_quality(q&a).pdf", "url": "https://drive.google.com/uc?id=1LFojf3Weflv3fVdrZrgTY1iUaRdbT9kG&export=download"} + ] + } + } + ], "trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2", "related_apps": [ "libreoffice_calc", diff --git a/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json index 4a1af8e..1317148 100644 --- a/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json +++ b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json @@ -1,26 +1,63 @@ { - "id": "26150609-0da3-4a7d-8868-0faf9c5f01bb", - "snapshot": "libreoffice_calc", - "instruction": "So, I've been dabbling with coding a Snake game in Python, and I finally got it up and running. It's pretty cool, but it's not without its quirks. The biggest issue I'm facing right now is that the snake can't seem to eat the food, no matter what. Could you help me tweak the code so the snake can actually eat the food? Thanks a bunch!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/26150609-0da3-4a7d-8868-0faf9c5f01bb", - "related_apps": [ - - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "26150609-0da3-4a7d-8868-0faf9c5f01bb", + "snapshot": "libreoffice_calc", + "instruction": "So, I've been dabbling with coding a Snake game in Python, and I finally got it up and running. It's pretty cool, but it's not without its quirks. The biggest issue I'm facing right now is that the snake can't seem to eat the food, no matter what. Could you help me tweak the code so the snake can actually eat the food? Thanks a bunch!", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/snake" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/snake/food.py", + "url": "https://drive.usercontent.google.com/download?id=1Xkza2CaWyMPwnWiDmlP2lh1mvp6LuIJp&export=download&authuser=0&confirm=t&uuid=4e57edc7-5a7e-4a31-8436-656f3b398804&at=APZUnTWyVtrAaUST2_wI2GSeQrgV:1709303403572" + }, + { + "path": "/home/user/Desktop/snake/main.py", + "url": "https://drive.usercontent.google.com/download?id=1jcYCRW9quS2aoO4MNbF6UFvca6Z6jO3P&export=download&authuser=0&confirm=t&uuid=a98c6bdc-4acb-43fe-864a-3461276c5ebb&at=APZUnTWQTXYHkJW9XiNgew5Qe3PI:1709303406766" + }, + { + "path": "/home/user/Desktop/snake/settings.py", + "url": "https://drive.usercontent.google.com/download?id=1aGis8eNKqLM2ZFMIe0sgzaGA-IfTdM_H&export=download&authuser=0&confirm=t&uuid=178f1635-9538-467a-b191-1ecb8e76bc78&at=APZUnTVouOz4zz4wEaYp06nf31_4:1709303409591" + }, + { + "path": "/home/user/Desktop/snake/snake.py", + "url": "https://drive.usercontent.google.com/download?id=1-EnaUyWeWmioOrd_S72_JN6xk5qdvJiK&export=download&authuser=0&confirm=t&uuid=6619d298-15a4-42f5-b3df-36f849e17aad&at=APZUnTXT-ko0Bc0uWau_tH3vkFnT:1709303412395" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code", + "/home/user/Desktop/snake" + ] + } + } + ], + "trajectory": "trajectories/26150609-0da3-4a7d-8868-0faf9c5f01bb", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } diff --git a/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json index 30c42ae..d45d4df 100644 --- a/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json +++ b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json @@ -4,7 +4,48 @@ "instruction": "Hi, I recently playing with developing a small python-based teris game. While I have finished most of the part, something is wrong under some cases when I press up to rotate, the whole program will crash, please run the code for me and fix the bugs of code.", "source": "authors", "config": [ - + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/teris" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/teris/block.py", + "url": "https://drive.usercontent.google.com/download?id=1txPwaWML0e8cjpDW-nw5N6HOC6fYwfI1&export=download&authuser=0&confirm=t&uuid=0b69795a-3600-4ec4-be9b-533deeb76e94&at=APZUnTVK-VGsZfTufLSh_3eRigYb:1709303077428" + }, + { + "path": "/home/user/Desktop/teris/main.py", + "url": "https://drive.usercontent.google.com/download?id=1vJ2FEw7RNfRr0KBynHM36_nNAc4jTArl&export=download&authuser=0&confirm=t&uuid=11f697bc-1414-46a0-bc2d-b2c49686d85e&at=APZUnTXCtKDpqTAOstIioRrJzx67:1709303181059" + }, + { + "path": "/home/user/Desktop/teris/settings.py", + "url": "https://drive.usercontent.google.com/download?id=1tYI8QZz-T-sNFRAMaYnzFv80upIveUT-&export=download&authuser=0&confirm=t&uuid=50c64eed-ab7e-4dbe-82ce-d01b25364556&at=APZUnTXUok8Cos6rUo-fnKLwnPmn:1709303184763" + }, + { + "path": "/home/user/Desktop/teris/teris.py", + "url": "https://drive.usercontent.google.com/download?id=1UOJp5Y6JLmaXmZXHc_GkM3nMfKbS6Dyu&export=download&authuser=0&confirm=t&uuid=3189e5d6-5984-45c7-9dd8-6f8ec3a0845e&at=APZUnTWYTtsu27Ds3aHrZpIAnNcA:1709303187967" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code", + "/home/user/Desktop/teris" + ] + } + } ], "trajectory": "trajectories/9219480b-3aed-47fc-8bac-d2cffc5849f7", "related_apps": [ diff --git a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json index d6e3d96..5161462 100644 --- a/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json +++ b/evaluation_examples/examples/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92.json @@ -1,26 +1,64 @@ { - "id": "d1acdb87-bb67-4f30-84aa-990e56a09c92", - "snapshot": "libreoffice_calc", - "instruction": "Can you help me find these restaurants on Google Maps? I have a list of places I've been wanting to visit, and I need to gather some detailed information about them, including the address, what people have to say about them (ratings), and the hours of operation. It would be great if you could search for each of them on Google Maps and help me jot down this information into my sheet file, thanks!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/d1acdb87-bb67-4f30-84aa-990e56a09c92", - "related_apps": [ - - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "d1acdb87-bb67-4f30-84aa-990e56a09c92", + "snapshot": "libreoffice_calc", + "instruction": "Can you help me find these restaurants on Google Maps? I have a list of places I've been wanting to visit, and I need to gather some detailed information about them, including the address, what people have to say about them (ratings), and the hours of operation. It would be great if you could search for each of them on Google Maps and help me jot down this information into my sheet file, thanks!", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/restaurants.txt", + "url": "https://drive.google.com/uc?id=1IehFLJPZcFv8Ujk31ExbyGLji9AylmmJ&export=download" + }, + { + "path": "/home/user/Desktop/MUST_VISIT.xlsx", + "url": "https://drive.google.com/uc?id=1fXmjvZcwkIcckMIAXi3Hv_JAbVWpgs_l&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/MUST_VISIT.xlsx" + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/restaurants.txt" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 5 + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "restaurants.txt (~/Desktop) - gedit", + "strict": true + } + } + ], + "trajectory": "trajectories/d1acdb87-bb67-4f30-84aa-990e56a09c92", + "related_apps": [ + "os", + "chrome", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } From 53ed5588e38ffa8a97e12a95f53ea505c5675080 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 1 Mar 2024 23:14:42 +0800 Subject: [PATCH 12/13] ver Mar1stv2 two new tasks --- .../00fa164e-2612-4439-992e-157d019a8436.json | 85 +++++++++++++++++ .../7e287123-70ca-47b9-8521-47db09b69b14.json | 2 +- .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 94 +++++++++++++++++++ 3 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 evaluation_examples/examples/multi_apps/00fa164e-2612-4439-992e-157d019a8436.json create mode 100644 evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json diff --git a/evaluation_examples/examples/multi_apps/00fa164e-2612-4439-992e-157d019a8436.json b/evaluation_examples/examples/multi_apps/00fa164e-2612-4439-992e-157d019a8436.json new file mode 100644 index 0000000..3baf77f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/00fa164e-2612-4439-992e-157d019a8436.json @@ -0,0 +1,85 @@ +{ + "id": "00fa164e-2612-4439-992e-157d019a8436", + "snapshot": "libreoffice_writer", + "instruction": "I'm writing a report in Writer. A little data are recorded in \"~/Documents/awesome-desktop/expe-results.xlsx\". Help me to extract the results of LLM-based models and insert a table in the \"Main Results\" section of my report.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/awesome-desktop/"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Documents/awesome-desktop/awe_desk_env.docx", + "url": "https://drive.google.com/uc?id=1oXro9_S7nZs_h_AZq5TJWEBofbrZM3oG&export=download" + }, + { + "path": "/home/user/Documents/awesome-desktop/expe-results.xlsx", + "url": "https://drive.google.com/uc?id=1DwOikfnAumkZfvVcjIncgKZMITY3ruku&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Documents/awesome-desktop/awe_desk_env.docx" + } + } + ], + "trajectory": "trajectories/00fa164e-2612-4439-992e-157d019a8436", + "related_apps": [ + "libreoffice_writer", + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "awe_desk_env.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_docx_tables", + "result": { + "type": "vm_file", + "path": "/home/user/Documents/awesome-desktop/awe_desk_env.docx", + "dest": "awe_desk_env.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1hMZKSF74qBUvBDAu_zF8vHm6MKu96VBN&export=download", + "dest": "awe_desk_env_gt.docx" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json index a398bd4..95d1eec 100644 --- a/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json +++ b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json @@ -1,7 +1,7 @@ { "id": "7e287123-70ca-47b9-8521-47db09b69b14", "snapshot": "libreoffice_calc", - "instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for the past five years in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.", + "instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for 2019~2023 in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.", "source": "authors", "config": [ { diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json new file mode 100644 index 0000000..0b797a7 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json @@ -0,0 +1,94 @@ +{ + "id": "b5062e3e-641c-4e3a-907b-ac864d2e7652", + "snapshot": "libreoffice_calc", + "instruction": "I've got a mass of different categories of paper in PDF under folder \"~/Documents/Papers\". Please help me to extract the name, mail, and affiliation of the first author of all the papers and note them in an Excel table. The corresponding headers should be added in the table. Simply sort the authors by their full names ascendingly. The summary file should be saved as \"~/authors.xlsx\".", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Papers"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Documents/Papers/zhang_appagent.pdf", + "url": "https://arxiv.org/pdf/2312.13771.pdf" + }, + { + "path": "/home/user/Documents/Papers/niu_screenagent.pdf", + "url": "https://arxiv.org/pdf/2402.07945.pdf" + }, + { + "path": "/home/user/Documents/Papers/koh_visualwebarena.pdf", + "url": "https://arxiv.org/pdf/2401.13649.pdf" + }, + { + "path": "/home/user/Documents/Papers/deng_mind2web.pdf", + "url": "https://papers.nips.cc/paper_files/paper/2023/file/5950bf290a1570ea401bf98882128160-Paper-Datasets_and_Benchmarks.pdf" + } + ] + } + } + ], + "trajectory": "trajectories/b5062e3e-641c-4e3a-907b-ac864d2e7652", + "related_apps": [ + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "libreoffice", + "--convert-to", + "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", + "--outdir", + "/home/user", + "/home/user/authors.xlsx" + ] + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": [ + "/home/user/authors.xlsx", + "/home/user/authors-Sheet1.csv" + ], + "dest": [ + "authors.xlsx", + "authors-Sheet1.csv" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download", + "https://drive.google.com/uc?id=1fq4hbk1g9R_SjknzwFAqvyF1ICyNYfok&export=download" + ], + "dest": [ + "authors-gt.xlsx", + "authors-gt-Sheet1.csv" + ], + "multi": true + }, + "options": { + "rules": [ + { + "type": "sheet_print", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + } + } +} From 79c345ee9dfc3876d8255a4b48e010ca1ca8e3c0 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sat, 2 Mar 2024 13:06:35 +0800 Subject: [PATCH 13/13] Update infeasible examples --- .../045bf3ff-9077-4b86-b483-a1040a949cff.json | 6 +----- .../2e6f678f-472d-4c55-99cc-8e7c5c402a71.json | 6 +----- .../58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json | 6 +----- .../8ea73f6f-9689-42ad-8c60-195bbf06a7ba.json | 6 +----- .../dbbf4b99-2253-4b10-9274-45f246af2466.json | 6 +----- .../6d72aad6-187a-4392-a4c4-ed87269c51cf.json | 18 ++++++++++++++++++ 6 files changed, 23 insertions(+), 25 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/6d72aad6-187a-4392-a4c4-ed87269c51cf.json diff --git a/evaluation_examples/examples/gimp/045bf3ff-9077-4b86-b483-a1040a949cff.json b/evaluation_examples/examples/gimp/045bf3ff-9077-4b86-b483-a1040a949cff.json index 54e5534..cfe57ee 100644 --- a/evaluation_examples/examples/gimp/045bf3ff-9077-4b86-b483-a1040a949cff.json +++ b/evaluation_examples/examples/gimp/045bf3ff-9077-4b86-b483-a1040a949cff.json @@ -30,10 +30,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "expected": { - }, - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json b/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json index 6f7b55e..8cbd02d 100644 --- a/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json +++ b/evaluation_examples/examples/gimp/2e6f678f-472d-4c55-99cc-8e7c5c402a71.json @@ -37,10 +37,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "expected": { - }, - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json b/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json index 5fe5537..78cb3a5 100644 --- a/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json +++ b/evaluation_examples/examples/gimp/58d3eeeb-e9d0-499f-962e-fd0db2a744d8.json @@ -30,10 +30,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "expected": { - }, - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/gimp/8ea73f6f-9689-42ad-8c60-195bbf06a7ba.json b/evaluation_examples/examples/gimp/8ea73f6f-9689-42ad-8c60-195bbf06a7ba.json index a19c2a5..582ff91 100644 --- a/evaluation_examples/examples/gimp/8ea73f6f-9689-42ad-8c60-195bbf06a7ba.json +++ b/evaluation_examples/examples/gimp/8ea73f6f-9689-42ad-8c60-195bbf06a7ba.json @@ -30,10 +30,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "expected": { - }, - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/gimp/dbbf4b99-2253-4b10-9274-45f246af2466.json b/evaluation_examples/examples/gimp/dbbf4b99-2253-4b10-9274-45f246af2466.json index f9087f3..292c9fe 100644 --- a/evaluation_examples/examples/gimp/dbbf4b99-2253-4b10-9274-45f246af2466.json +++ b/evaluation_examples/examples/gimp/dbbf4b99-2253-4b10-9274-45f246af2466.json @@ -30,10 +30,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "expected": { - }, - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/6d72aad6-187a-4392-a4c4-ed87269c51cf.json b/evaluation_examples/examples/multi_apps/6d72aad6-187a-4392-a4c4-ed87269c51cf.json new file mode 100644 index 0000000..de4a9f4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/6d72aad6-187a-4392-a4c4-ed87269c51cf.json @@ -0,0 +1,18 @@ +{ + "id": "6d72aad6-187a-4392-a4c4-ed87269c51cf", + "snapshot": "libreoffice_calc", + "instruction": "Could you please converting OpenOffice Impress presentation to video and play it with VLC?", + "source": "https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording", + "config": [ + ], + "trajectory": "trajectories/6d72aad6-187a-4392-a4c4-ed87269c51cf", + "related_apps": [ + "libreoffice_calc", + "libreoffice_impress", + "libreoffice_writer", + "vlc" + ], + "evaluator": { + "func": "infeasible" + } +}